From 110f7318cc9fa07e1a7176832c40816ae1eda6a4 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Thu, 5 Mar 2026 22:14:25 +0530 Subject: [PATCH] chore: Update backend services, intelligence integration, and documentation --- .../services/intelligence/sif_integration.py | 2 + .../services/intelligence/txtai_service.py | 14 ++- docs/llm_gateway/Architecture.md | 104 ++++++++++++++++++ docs/llm_gateway/Extending_the_Gateway.md | 96 ++++++++++++++++ docs/llm_gateway/Features_and_Status.md | 59 ++++++++++ docs/llm_gateway/Modules.md | 97 ++++++++++++++++ docs/subscription/Billing_and_Usage.md | 71 ++++++++++++ 7 files changed, 439 insertions(+), 4 deletions(-) create mode 100644 docs/llm_gateway/Architecture.md create mode 100644 docs/llm_gateway/Extending_the_Gateway.md create mode 100644 docs/llm_gateway/Features_and_Status.md create mode 100644 docs/llm_gateway/Modules.md create mode 100644 docs/subscription/Billing_and_Usage.md diff --git a/backend/services/intelligence/sif_integration.py b/backend/services/intelligence/sif_integration.py index bbbb12f5..110c41aa 100644 --- a/backend/services/intelligence/sif_integration.py +++ b/backend/services/intelligence/sif_integration.py @@ -925,6 +925,8 @@ class SIFIntegrationService: # Content pillar analysis if self.intelligence_service.is_initialized(): clusters = await self.intelligence_service.cluster(min_score=0.6) + if asyncio.iscoroutine(clusters): + clusters = await clusters insights["content_pillars"] = self._format_clusters_as_pillars(clusters) # Semantic gaps analysis diff --git a/backend/services/intelligence/txtai_service.py b/backend/services/intelligence/txtai_service.py index ed0766e6..430a844b 100644 --- a/backend/services/intelligence/txtai_service.py +++ b/backend/services/intelligence/txtai_service.py @@ -215,6 +215,7 @@ class TxtaiIntelligenceService: logger.error(f"Detected known txtai/faiss IndexIDMap/nprobe incompatibility for user {self.user_id}. Attempting re-init with numpy backend fallback...") # Switch to numpy backend which doesn't have this issue self._backend = "numpy" + self._initialized = False self._initialize_embeddings() if self.embeddings: results = self.embeddings.search(query, limit=limit) @@ -270,7 +271,9 @@ class TxtaiIntelligenceService: except AttributeError as ae: if "nprobe" in str(ae): logger.error(f"Detected IndexIDMap nprobe error in similarity for user {self.user_id}. Falling back to numpy backend...") + # Switch to numpy backend which doesn't have this issue self._backend = "numpy" + self._initialized = False self._initialize_embeddings() if self.embeddings: similarity = self.embeddings.similarity(text1, text2) @@ -328,7 +331,7 @@ class TxtaiIntelligenceService: # Check if we have graph functionality available if not hasattr(self.embeddings, 'graph') or not self.embeddings.graph: logger.warning(f"Graph clustering not available for user {self.user_id}. Using fallback clustering.") - return self._fallback_clustering(min_score) + return await self._fallback_clustering(min_score) # Use graph-based clustering if available # Perform a search to get graph structure @@ -338,10 +341,13 @@ class TxtaiIntelligenceService: except AttributeError as ae: if "nprobe" in str(ae): logger.error(f"Detected IndexIDMap nprobe error in cluster for user {self.user_id}. Falling back to numpy backend...") + # Force re-initialization with numpy backend to bypass FAISS issue self._backend = "numpy" + self._initialized = False self._initialize_embeddings() if self.embeddings: - graph_results = self.embeddings.search(sample_query, limit=10, graph=True) + # Retry with numpy backend (no graph support, so fallback) + return await self._fallback_clustering(min_score) else: raise ae else: @@ -349,7 +355,7 @@ class TxtaiIntelligenceService: if not graph_results: logger.warning(f"No graph results for clustering user {self.user_id}") - return self._fallback_clustering(min_score) + return await self._fallback_clustering(min_score) # Extract clusters from graph results clusters = self._extract_clusters_from_graph(graph_results, min_score) @@ -377,7 +383,7 @@ class TxtaiIntelligenceService: logger.error(f"Clustering failed for user {self.user_id}: {e}") logger.error(f"Min score: {min_score}") logger.error(f"Full traceback: {traceback.format_exc()}") - return self._fallback_clustering(min_score) + return await self._fallback_clustering(min_score) async def _fallback_clustering(self, min_score: float) -> List[List[int]]: """Fallback clustering method when graph clustering is not available.""" diff --git a/docs/llm_gateway/Architecture.md b/docs/llm_gateway/Architecture.md new file mode 100644 index 00000000..cf9e98bb --- /dev/null +++ b/docs/llm_gateway/Architecture.md @@ -0,0 +1,104 @@ +# ALwrity LLM Gateway – Architecture Overview + +ALwrity’s LLM Gateway lives under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) and provides a consistent, production‑oriented interface for text, image, audio, and video generation across multiple model providers. It encapsulates provider differences, applies subscription enforcement, and centralizes observability and reliability patterns. + +## Goals +- Unified surface for LLM operations across providers +- Strong subscription enforcement and cost awareness +- Resilient calls with retries and structured error handling +- Extensible provider architecture with clear contracts +- Transparent metrics, usage logging, and pricing integration + +## High‑Level Flow +1. Entry points route requests to the appropriate capability: + - Text generation via [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py) + - Image generation and editing via [image_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation) + - Video generation via [video_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation) + - Audio/STT via [audio_to_text_generation](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation) +2. Subscription enforcement integrates before provider calls: + - Uses PricingService and UsageTrackingService to validate tokens/operations + - Blocks requests that exceed limits with actionable error payloads +3. Provider module performs the call with provider‑specific SDKs/APIs +4. Results are normalized to ALwrity types and returned upstream + +## Core Components +- **Text Generation Entry**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py) + - Detects available providers via APIKeyManager + - Applies strict subscription checks using PricingService and UsageTrackingService + - Routes to Gemini or Hugging Face implementations +- **Image Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py) + - Options and Result dataclasses + - Protocols for generation, edit, and face‑swap providers +- **Video Generation Contracts**: [base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py) + - Options and Result dataclasses + - Async protocol with progress callbacks +- **Provider Implementations**: + - Gemini text: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py) + - Hugging Face text: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py) + - Hugging Face image: [hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py) + - WaveSpeed video: [wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py) + +## Provider Abstraction +- Image providers conform to: + - ImageGenerationProvider.generate(options) -> ImageGenerationResult + - ImageEditProvider.edit(options) -> ImageGenerationResult + - FaceSwapProvider.swap_face(options) -> ImageGenerationResult +- Video providers conform to: + - VideoGenerationProvider.generate_video(options, progress_cb) -> VideoGenerationResult + +These contracts ensure consistent options/result types so downstream UI and logging remain stable regardless of provider. + +## Subscription Enforcement +- Performed in the text pipeline entry point before any provider call: + - See enforcement and usage checks in [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py#L117-L166) +- Preflight operations endpoint also validates multi‑operation cost/limits: + - See [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py) +- Image/video modules typically rely on the calling route to validate limits first, then perform provider calls. + +## Configuration and Secrets +- Gemini: GEMINI_API_KEY + - Loaded and validated in [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L101-L116) +- Hugging Face: HF_TOKEN + - Loaded and validated in [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L90-L105) +- Hugging Face image defaults: HF_IMAGE_MODEL + - Used in [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py#L17-L21) +- Provider clients must never log secrets; logs are provider‑scoped via get_service_logger. + +## Reliability and Error Handling +- Exponential backoff retries using tenacity: + - Gemini text: [gemini_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py#L117) + - Hugging Face text: [huggingface_text_response](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py#L106) +- Structured exceptions surface HTTP 429 for limit breaches with usage info +- Provider modules return normalized results; callers handle downstream persistence and telemetry + +## Pricing and Cost Awareness +- Preflight cost estimation computes operation costs per provider/model: + - See multi‑operation handling in [preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py#L100-L144) +- Video cost calculation is provider/model aware: + - See WaveSpeed services and `calculate_cost` in [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py#L44-L56) + +## Observability +- Service‑scoped loggers for each provider/module +- Central usage logs recorded via subscription services on the calling routes +- Provider metadata normalized in result objects for consistent analytics + +## Extensibility Guidelines +- Implement the appropriate Protocol interface in a new provider module +- Normalize options and results to the gateway dataclasses +- Keep environment/key validation local to the provider module +- Add cost mapping in PricingService and preflight for new operations/models +- Wire subscription validation in the calling route before invoking provider + +## Request Lifecycle (Text) +1. Client submits prompt to text endpoint +2. Entry point determines provider (env or APIKeyManager) and validates subscription limits +3. Provider‑specific function executes with retries and returns normalized text +4. Caller logs usage and returns response to client + +## Request Lifecycle (Media) +1. Client submits generation/edit/face‑swap request +2. Route validates plan limits (tokens, requests, or per‑operation limits) +3. Provider service executes call and produces normalized binary payload and metadata +4. Caller logs usage and returns media/links to client + +This architecture isolates provider variability while standardizing contracts, enabling safe expansion to new models and modalities without destabilizing upstream consumers. diff --git a/docs/llm_gateway/Extending_the_Gateway.md b/docs/llm_gateway/Extending_the_Gateway.md new file mode 100644 index 00000000..ff969e3d --- /dev/null +++ b/docs/llm_gateway/Extending_the_Gateway.md @@ -0,0 +1,96 @@ +# Extending the LLM Gateway + +This guide provides a checklist and templates for adding new providers or modalities to the ALwrity LLM Gateway. + +## Checklist + +1. **Define the Provider Interface**: + - [ ] Create a new module in `backend/services/llm_providers//`. + - [ ] Define input options dataclass (e.g., `MyNewProviderOptions`). + - [ ] Implement the standard Protocol (e.g., `ImageGenerationProvider`). + +2. **Configuration**: + - [ ] Add necessary API keys to `.env.example` and `APIKeyManager`. + - [ ] Add new provider enum to `backend/models/subscription_models.py` (`APIProvider`). + +3. **Pricing & Usage**: + - [ ] Add default pricing in `PricingService` or migration script. + - [ ] Ensure `UsageSummary` table has columns for this provider (if it's a major one) or map it to a generic category. + +4. **Integration**: + - [ ] Register the provider in the main entry point (e.g., `main_image_generation.py`). + - [ ] Update `preflight.py` to handle cost estimation for this provider. + +5. **Frontend**: + - [ ] Update `billingService.ts` to handle the new provider key in usage stats (if applicable). + - [ ] Add provider icon/color in `billingService.ts`. + +## Skeleton Template (Python) + +Here is a template for a new **Image Generation Provider**: + +```python +from __future__ import annotations +import os +from typing import Optional, Dict, Any +from dataclasses import dataclass +from .base import ImageGenerationOptions, ImageGenerationResult, ImageGenerationProvider +from utils.logger_utils import get_service_logger + +logger = get_service_logger("image_generation.my_new_provider") + +class MyNewProvider(ImageGenerationProvider): + """ + My New Provider implementation. + """ + + def __init__(self, api_key: Optional[str] = None): + self.api_key = api_key or os.getenv("MY_PROVIDER_API_KEY") + if not self.api_key: + raise RuntimeError("MY_PROVIDER_API_KEY is required") + # Initialize client here + + def generate(self, options: ImageGenerationOptions) -> ImageGenerationResult: + logger.info(f"Generating image with MyNewProvider: {options.prompt[:50]}...") + + try: + # 1. Call External API + # response = client.generate(...) + + # 2. Process Response (Mock) + image_bytes = b"fake_image_data" + width = options.width + height = options.height + + # 3. Return Standard Result + return ImageGenerationResult( + image_bytes=image_bytes, + width=width, + height=height, + provider="my_new_provider", + model=options.model or "default-model", + seed=options.seed, + metadata={"raw_response": "..."} + ) + + except Exception as e: + logger.error(f"Generation failed: {e}") + raise +``` + +## Skeleton Template (Route Integration) + +In `main_image_generation.py`: + +```python +from .image_generation.my_new_provider import MyNewProvider + +def generate_image(prompt: str, provider: str, ...): + # ... existing code ... + + if provider == "my_new_provider": + service = MyNewProvider() + result = service.generate(options) + + # ... existing code ... +``` diff --git a/docs/llm_gateway/Features_and_Status.md b/docs/llm_gateway/Features_and_Status.md new file mode 100644 index 00000000..e8250466 --- /dev/null +++ b/docs/llm_gateway/Features_and_Status.md @@ -0,0 +1,59 @@ +# LLM Gateway – Features & Implementation Status + +This document provides a high-level overview of the LLM Gateway's capabilities and the current production status of each component. + +## Core Features + +- **Unified Interface**: Single API surface for text, image, video, and audio generation, abstracting away provider-specific SDKs. +- **Provider Agnostic**: Switch between Gemini, Hugging Face, Stability, WaveSpeed, etc., via configuration or runtime parameters. +- **Subscription Enforcement**: Strict pre-flight checks against user plans (Free, Basic, Pro, Enterprise) before any API call. +- **Cost Awareness**: Granular tracking of input/output tokens, request counts, and media generation costs per provider/model. +- **Resilience**: Built-in retries (exponential backoff) for transient failures (rate limits, timeouts). +- **Observability**: Centralized logging (`APIUsageLog`) and usage aggregation (`UsageSummary`) for all modalities. +- **Streaming Support**: (Partial) Infrastructure exists for text streaming, though primarily used for blocking responses currently. + +## Implementation Status + +### 1. Text Generation +| Feature | Provider | Status | Notes | +| :--- | :--- | :--- | :--- | +| **Chat/Completion** | Google Gemini | ✅ Production | Default provider. Supports `gemini-2.0-flash`. | +| **Chat/Completion** | Hugging Face | ✅ Production | via Inference Providers (e.g., `mistralai/Mistral-7B`). | +| **Structured JSON** | Gemini | ✅ Production | Uses `response_schema` for reliable parsing. | +| **Structured JSON** | Hugging Face | ✅ Production | Uses `response_format={ "type": "json_object" }`. | + +### 2. Image Generation +| Feature | Provider | Status | Notes | +| :--- | :--- | :--- | :--- | +| **Text-to-Image** | Google Gemini | ✅ Production | Imagen 3 models. | +| **Text-to-Image** | Hugging Face | ✅ Production | FLUX.1 via fal-ai/Black Forest Labs. | +| **Text-to-Image** | Stability AI | ✅ Production | Core/SD3 models. | +| **Text-to-Image** | WaveSpeed | ✅ Production | High-speed generation. | +| **Image Editing** | WaveSpeed | ✅ Production | Inpainting, background removal, face swap. | + +### 3. Video Generation +| Feature | Provider | Status | Notes | +| :--- | :--- | :--- | :--- | +| **Text-to-Video** | WaveSpeed | ✅ Production | HunyuanVideo-1.5, LTX-2 Pro. | +| **Image-to-Video** | WaveSpeed | 🚧 Planned | Roadmap item. | + +### 4. Audio Generation +| Feature | Provider | Status | Notes | +| :--- | :--- | :--- | :--- | +| **Text-to-Speech** | Gemini | ✅ Production | Audio generation capability. | +| **Text-to-Speech** | WaveSpeed | ✅ Production | Fast TTS. | +| **Speech-to-Text** | Gemini | ✅ Production | Transcription (via `audio_to_text_generation`). | + +### 5. Research & Tools +| Feature | Provider | Status | Notes | +| :--- | :--- | :--- | :--- | +| **Web Search** | Tavily | ✅ Production | Integrated for grounded research. | +| **Web Search** | Serper | ✅ Production | Google Search API alternative. | +| **Web Search** | Exa | ✅ Production | Neural search. | + +## Roadmap & Next Steps + +- **Streaming Standardization**: Unify streaming interfaces across all text providers for consistent frontend UX. +- **Model Fallbacks**: Automatic failover to secondary providers if the primary is down (currently manual/env-based). +- **Fine-tuning Support**: Add gateway endpoints for triggering and using fine-tuned jobs. +- **Caching Layer**: Redis-based semantic caching for frequent queries to reduce costs. diff --git a/docs/llm_gateway/Modules.md b/docs/llm_gateway/Modules.md new file mode 100644 index 00000000..c25e1def --- /dev/null +++ b/docs/llm_gateway/Modules.md @@ -0,0 +1,97 @@ +# LLM Gateway – Module Reference + +This document catalogs the modules under [llm_providers](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers) with their responsibilities, key classes/functions, configuration, and integration points. + +## Text Generation +- **Entry point**: [main_text_generation.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/main_text_generation.py) + - llm_text_gen(prompt, system_prompt, json_struct, user_id) + - Responsibilities: + - Resolve provider (env or APIKeyManager) + - Perform strict subscription checks (PricingService, UsageTrackingService) + - Call Gemini or Hugging Face implementations + - Integration: + - models.subscription_models.APIProvider mapping + - services.subscription.PricingService, UsageTrackingService + +- **Gemini provider**: [gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/gemini_provider.py) + - get_gemini_api_key() – env validation + - gemini_text_response(...) – tenacity‑backed retries, text output + - gemini_structured_json_response(...) – structured JSON output + - Config: GEMINI_API_KEY + - SDK: google.generativeai + +- **Hugging Face provider**: [huggingface_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/huggingface_provider.py) + - get_huggingface_api_key() – env validation + - huggingface_text_response(...) – Responses API (OpenAI client), retries + - huggingface_structured_json_response(...) – structured JSON output + - Config: HF_TOKEN + - SDK: openai client pointed at Hugging Face router + +## Image Generation +- **Contracts**: [image_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/base.py) + - ImageGenerationOptions, ImageGenerationResult + - ImageEditOptions, FaceSwapOptions (with to_dict helpers) + - Protocols: ImageGenerationProvider, ImageEditProvider, FaceSwapProvider + +- **Hugging Face image**: [image_generation/hf_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/hf_provider.py) + - Class: HuggingFaceImageProvider(ImageGenerationProvider) + - generate(options) -> ImageGenerationResult + - Config: HF_TOKEN, HF_IMAGE_MODEL (default FLUX.1‑Krea‑dev) + - SDK: huggingface_hub.InferenceClient (provider="fal-ai") + +- **Other image modules**: + - [image_generation/gemini_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/gemini_provider.py) – Gemini image generation integration + - [image_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_provider.py) – WaveSpeed image editing + - [image_generation/wavespeed_face_swap_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_face_swap_provider.py) – Face swap + - [image_generation/wavespeed_edit_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/image_generation/wavespeed_edit_provider.py) – General edits + +## Video Generation +- **Contracts**: [video_generation/base.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/base.py) + - VideoGenerationOptions, VideoGenerationResult + - Protocol: VideoGenerationProvider (async, progress callbacks) + +- **WaveSpeed video**: [video_generation/wavespeed_provider.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/video_generation/wavespeed_provider.py) + - BaseWaveSpeedTextToVideoService: + - MODEL_NAME/PATH contract + - calculate_cost(resolution, duration) + - input validation helpers + - Model services (e.g., HunyuanVideoService, LTX‑2 variants) + - Client: services.wavespeed.client.WaveSpeedClient + +## Audio / STT +- **Modules**: + - [audio_to_text_generation/gemini_audio_text.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/gemini_audio_text.py) + - [audio_to_text_generation/stt_audio_blog.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/services/llm_providers/audio_to_text_generation/stt_audio_blog.py) + - Responsibilities: + - Convert audio to text + - Provide structured outputs for downstream blog/content workflows + +## Shared Patterns +- **Environment handling**: + - Providers validate their own secrets and default models + - No secrets logged; provider‑scoped logger via utils.logger_utils.get_service_logger +- **Result normalization**: + - Binary payloads (image_bytes, video_bytes) and metadata are standardized + - Provider name/model surfaced in result for analytics +- **Retries and resilience**: + - Text providers use tenacity exponential backoff + - Media providers implement validation and sensible defaults + +## Integration Points +- Subscription enforcement and preflight: + - [api/subscription/routes/preflight.py](file:///C:/Users/diksha%20rawat/Desktop/ALwrity/backend/api/subscription/routes/preflight.py) + - PricingService/UsageTrackingService are invoked prior to calling providers +- Usage logging: + - Centralized in subscription services; gateway returns normalized data for logging +- Pricing: + - Per‑provider and per‑model costs reflected in preflight and service layers + +## Extending the Gateway +1. Choose modality (text/image/video/audio) +2. Implement the appropriate Protocol and dataclasses +3. Validate and load configuration from environment +4. Normalize outputs to gateway result types +5. Add pricing/preflight entries and update subscription limit checks +6. Add route handlers that perform validation then call the new provider + +Following this reference ensures new providers integrate smoothly with ALwrity’s subscription, pricing, and analytics subsystems while keeping UI/API stable across diverse models. diff --git a/docs/subscription/Billing_and_Usage.md b/docs/subscription/Billing_and_Usage.md new file mode 100644 index 00000000..5f751c7d --- /dev/null +++ b/docs/subscription/Billing_and_Usage.md @@ -0,0 +1,71 @@ +# Subscription, Billing & Usage Tracking + +This document details how ALwrity manages subscriptions, processes payments via Stripe, and tracks granular usage for every user interaction. + +## 1. Subscription Model + +ALwrity uses a **tier-based subscription model** enforced at the API gateway level. + +### Tiers +- **Free**: Limited access, community support. +- **Basic**: Entry-level AI usage, standard support. +- **Pro**: High limits, advanced models (Gemini Pro, FLUX), priority support. +- **Enterprise**: Custom limits, dedicated infrastructure. + +### Data Model (`UserSubscription`) +Stored in the user's SQLite database (`alwrity_user_{id}.db`): +- `stripe_customer_id`: Link to Stripe Customer. +- `stripe_subscription_id`: Active subscription ID. +- `plan_id`: Internal plan reference (linked to `SubscriptionPlan`). +- `status`: `active`, `past_due`, `canceled`, etc. +- `current_period_start` / `end`: Defines the billing cycle window. + +## 2. Billing Integration (Stripe) + +We use **Stripe** for all payments, webhooks, and portal management. + +### Key Components +- **StripeService**: Handles checkout creation, portal sessions, and webhooks. +- **Webhooks**: Listens for events like `invoice.payment_succeeded`, `customer.subscription.updated`. + - **Idempotency**: All webhooks are tracked in `ProcessedStripeEvent` to prevent duplicate processing. + - **Reliability**: Events are processed transactionally; failures are logged and retried by Stripe. +- **Configuration**: Plan-to-Price mapping is loaded from environment variables (`STRIPE_PLAN_PRICE_MAPPING_TEST` / `_LIVE`) to ensure sync between code and Stripe Dashboard. + +### Checkout Flow +1. Frontend calls `/api/subscription/create-checkout-session`. +2. Backend validates user and creates Stripe Session. +3. User pays on Stripe. +4. Stripe sends `checkout.session.completed` webhook. +5. Backend provisions subscription and credits in `UserSubscription`. + +## 3. Usage Tracking + +Every API call to an LLM provider is tracked, costed, and logged. + +### Tracking Flow +1. **Pre-flight Check** (`check_usage_limits`): + - Before generating content, the system estimates cost/tokens. + - If user exceeds plan limits (e.g., "50 videos/month"), the request is rejected (429). +2. **Execution**: The provider generates the content. +3. **Post-execution Log** (`track_usage`): + - Actual tokens/duration are measured. + - Cost is calculated based on `APIProviderPricing` table. + - Entry added to `APIUsageLog` (granular) and aggregated into `UsageSummary` (monthly totals). + +### Database Tables +- **`APIUsageLog`**: Immutable ledger of every call. + - Fields: `user_id`, `provider`, `model`, `input_tokens`, `output_tokens`, `cost`, `status_code`. +- **`UsageSummary`**: Aggregated stats per billing period. + - Fields: `total_calls`, `total_cost`, `gemini_calls`, `video_calls`, etc. + - **Unique Constraint**: Enforced on `(user_id, billing_period)` to prevent data drift. + +### Pricing Engine (`PricingService`) +- Costs are not hardcoded. They are fetched from `APIProviderPricing` table. +- Supports per-token (text), per-image (media), and per-second (video/audio) pricing models. +- Admin can update pricing in DB without redeploying code. + +## 4. Frontend Integration + +- **Usage Dashboard**: Visualizes consumption vs. limits. +- **Real-time**: Usage stats are typically updated immediately after generation. +- **Limit Rings**: UI components show percentage used (e.g., "80% of monthly video limit").