Files
moreminimore-marketing/backend/models/stability_models.py
Kunthawat Greethong c35fa52117 Base code
2026-01-08 22:39:53 +07:00

474 lines
21 KiB
Python

"""Pydantic models for Stability AI API requests and responses."""
from pydantic import BaseModel, Field
from typing import Optional, List, Union, Literal, Tuple
from enum import Enum
# ==================== ENUMS ====================
class OutputFormat(str, Enum):
"""Supported output formats for images."""
JPEG = "jpeg"
PNG = "png"
WEBP = "webp"
class AudioOutputFormat(str, Enum):
"""Supported output formats for audio."""
MP3 = "mp3"
WAV = "wav"
class AspectRatio(str, Enum):
"""Supported aspect ratios."""
RATIO_21_9 = "21:9"
RATIO_16_9 = "16:9"
RATIO_3_2 = "3:2"
RATIO_5_4 = "5:4"
RATIO_1_1 = "1:1"
RATIO_4_5 = "4:5"
RATIO_2_3 = "2:3"
RATIO_9_16 = "9:16"
RATIO_9_21 = "9:21"
class StylePreset(str, Enum):
"""Supported style presets."""
ENHANCE = "enhance"
ANIME = "anime"
PHOTOGRAPHIC = "photographic"
DIGITAL_ART = "digital-art"
COMIC_BOOK = "comic-book"
FANTASY_ART = "fantasy-art"
LINE_ART = "line-art"
ANALOG_FILM = "analog-film"
NEON_PUNK = "neon-punk"
ISOMETRIC = "isometric"
LOW_POLY = "low-poly"
ORIGAMI = "origami"
MODELING_COMPOUND = "modeling-compound"
CINEMATIC = "cinematic"
THREE_D_MODEL = "3d-model"
PIXEL_ART = "pixel-art"
TILE_TEXTURE = "tile-texture"
class FinishReason(str, Enum):
"""Generation finish reasons."""
SUCCESS = "SUCCESS"
CONTENT_FILTERED = "CONTENT_FILTERED"
class GenerationMode(str, Enum):
"""Generation modes for SD3."""
TEXT_TO_IMAGE = "text-to-image"
IMAGE_TO_IMAGE = "image-to-image"
class SD3Model(str, Enum):
"""SD3 model variants."""
SD3_5_LARGE = "sd3.5-large"
SD3_5_LARGE_TURBO = "sd3.5-large-turbo"
SD3_5_MEDIUM = "sd3.5-medium"
class AudioModel(str, Enum):
"""Audio model variants."""
STABLE_AUDIO_2_5 = "stable-audio-2.5"
STABLE_AUDIO_2 = "stable-audio-2"
class TextureResolution(str, Enum):
"""Texture resolution for 3D models."""
RES_512 = "512"
RES_1024 = "1024"
RES_2048 = "2048"
class RemeshType(str, Enum):
"""Remesh types for 3D models."""
NONE = "none"
TRIANGLE = "triangle"
QUAD = "quad"
class TargetType(str, Enum):
"""Target types for 3D mesh simplification."""
NONE = "none"
VERTEX = "vertex"
FACE = "face"
class LightSourceDirection(str, Enum):
"""Light source directions."""
LEFT = "left"
RIGHT = "right"
ABOVE = "above"
BELOW = "below"
class InpaintMode(str, Enum):
"""Inpainting modes."""
SEARCH = "search"
MASK = "mask"
# ==================== BASE MODELS ====================
class BaseStabilityRequest(BaseModel):
"""Base request model with common fields."""
seed: Optional[int] = Field(default=0, ge=0, le=4294967294, description="Random seed for generation")
output_format: Optional[OutputFormat] = Field(default=OutputFormat.PNG, description="Output image format")
class BaseImageRequest(BaseStabilityRequest):
"""Base request for image operations."""
negative_prompt: Optional[str] = Field(default=None, max_length=10000, description="What you do not want to see")
# ==================== GENERATE MODELS ====================
class StableImageUltraRequest(BaseImageRequest):
"""Request model for Stable Image Ultra generation."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for image generation")
aspect_ratio: Optional[AspectRatio] = Field(default=AspectRatio.RATIO_1_1, description="Aspect ratio")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
strength: Optional[float] = Field(default=None, ge=0, le=1, description="Image influence strength (required if image provided)")
class StableImageCoreRequest(BaseImageRequest):
"""Request model for Stable Image Core generation."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for image generation")
aspect_ratio: Optional[AspectRatio] = Field(default=AspectRatio.RATIO_1_1, description="Aspect ratio")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class StableSD3Request(BaseImageRequest):
"""Request model for Stable Diffusion 3.5 generation."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for image generation")
mode: Optional[GenerationMode] = Field(default=GenerationMode.TEXT_TO_IMAGE, description="Generation mode")
aspect_ratio: Optional[AspectRatio] = Field(default=AspectRatio.RATIO_1_1, description="Aspect ratio (text-to-image only)")
model: Optional[SD3Model] = Field(default=SD3Model.SD3_5_LARGE, description="SD3 model variant")
strength: Optional[float] = Field(default=None, ge=0, le=1, description="Image influence strength (image-to-image only)")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
cfg_scale: Optional[float] = Field(default=None, ge=1, le=10, description="CFG scale")
# ==================== EDIT MODELS ====================
class EraseRequest(BaseStabilityRequest):
"""Request model for image erasing."""
grow_mask: Optional[float] = Field(default=5, ge=0, le=20, description="Mask edge growth in pixels")
class InpaintRequest(BaseImageRequest):
"""Request model for image inpainting."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for inpainting")
grow_mask: Optional[float] = Field(default=5, ge=0, le=100, description="Mask edge growth in pixels")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class OutpaintRequest(BaseStabilityRequest):
"""Request model for image outpainting."""
left: Optional[int] = Field(default=0, ge=0, le=2000, description="Pixels to outpaint left")
right: Optional[int] = Field(default=0, ge=0, le=2000, description="Pixels to outpaint right")
up: Optional[int] = Field(default=0, ge=0, le=2000, description="Pixels to outpaint up")
down: Optional[int] = Field(default=0, ge=0, le=2000, description="Pixels to outpaint down")
creativity: Optional[float] = Field(default=0.5, ge=0, le=1, description="Creativity level")
prompt: Optional[str] = Field(default="", max_length=10000, description="Text prompt for outpainting")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class SearchAndReplaceRequest(BaseImageRequest):
"""Request model for search and replace."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for replacement")
search_prompt: str = Field(..., max_length=10000, description="What to search for")
grow_mask: Optional[float] = Field(default=3, ge=0, le=20, description="Mask edge growth in pixels")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class SearchAndRecolorRequest(BaseImageRequest):
"""Request model for search and recolor."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for recoloring")
select_prompt: str = Field(..., max_length=10000, description="What to select for recoloring")
grow_mask: Optional[float] = Field(default=3, ge=0, le=20, description="Mask edge growth in pixels")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class RemoveBackgroundRequest(BaseStabilityRequest):
"""Request model for background removal."""
pass # Only requires image and output_format
class ReplaceBackgroundAndRelightRequest(BaseImageRequest):
"""Request model for background replacement and relighting."""
subject_image: bytes = Field(..., description="Subject image binary data")
background_prompt: Optional[str] = Field(default=None, max_length=10000, description="Background description")
foreground_prompt: Optional[str] = Field(default=None, max_length=10000, description="Subject description")
preserve_original_subject: Optional[float] = Field(default=0.6, ge=0, le=1, description="Subject preservation")
original_background_depth: Optional[float] = Field(default=0.5, ge=0, le=1, description="Background depth matching")
keep_original_background: Optional[bool] = Field(default=False, description="Keep original background")
light_source_direction: Optional[LightSourceDirection] = Field(default=None, description="Light direction")
light_source_strength: Optional[float] = Field(default=0.3, ge=0, le=1, description="Light strength")
# ==================== UPSCALE MODELS ====================
class FastUpscaleRequest(BaseStabilityRequest):
"""Request model for fast upscaling."""
pass # Only requires image and output_format
class ConservativeUpscaleRequest(BaseImageRequest):
"""Request model for conservative upscaling."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for upscaling")
creativity: Optional[float] = Field(default=0.35, ge=0.2, le=0.5, description="Creativity level")
class CreativeUpscaleRequest(BaseImageRequest):
"""Request model for creative upscaling."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for upscaling")
creativity: Optional[float] = Field(default=0.3, ge=0.1, le=0.5, description="Creativity level")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
# ==================== CONTROL MODELS ====================
class SketchControlRequest(BaseImageRequest):
"""Request model for sketch control."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for generation")
control_strength: Optional[float] = Field(default=0.7, ge=0, le=1, description="Control strength")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class StructureControlRequest(BaseImageRequest):
"""Request model for structure control."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for generation")
control_strength: Optional[float] = Field(default=0.7, ge=0, le=1, description="Control strength")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class StyleControlRequest(BaseImageRequest):
"""Request model for style control."""
prompt: str = Field(..., min_length=1, max_length=10000, description="Text prompt for generation")
aspect_ratio: Optional[AspectRatio] = Field(default=AspectRatio.RATIO_1_1, description="Aspect ratio")
fidelity: Optional[float] = Field(default=0.5, ge=0, le=1, description="Style fidelity")
style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
class StyleTransferRequest(BaseImageRequest):
"""Request model for style transfer."""
prompt: Optional[str] = Field(default="", max_length=10000, description="Text prompt for generation")
style_strength: Optional[float] = Field(default=1, ge=0, le=1, description="Style strength")
composition_fidelity: Optional[float] = Field(default=0.9, ge=0, le=1, description="Composition fidelity")
change_strength: Optional[float] = Field(default=0.9, ge=0.1, le=1, description="Change strength")
# ==================== 3D MODELS ====================
class StableFast3DRequest(BaseStabilityRequest):
"""Request model for Stable Fast 3D."""
texture_resolution: Optional[TextureResolution] = Field(default=TextureResolution.RES_1024, description="Texture resolution")
foreground_ratio: Optional[float] = Field(default=0.85, ge=0.1, le=1, description="Foreground ratio")
remesh: Optional[RemeshType] = Field(default=RemeshType.NONE, description="Remesh algorithm")
vertex_count: Optional[int] = Field(default=-1, ge=-1, le=20000, description="Target vertex count")
class StablePointAware3DRequest(BaseStabilityRequest):
"""Request model for Stable Point Aware 3D."""
texture_resolution: Optional[TextureResolution] = Field(default=TextureResolution.RES_1024, description="Texture resolution")
foreground_ratio: Optional[float] = Field(default=1.3, ge=1, le=2, description="Foreground ratio")
remesh: Optional[RemeshType] = Field(default=RemeshType.NONE, description="Remesh algorithm")
target_type: Optional[TargetType] = Field(default=TargetType.NONE, description="Target type")
target_count: Optional[int] = Field(default=1000, ge=100, le=20000, description="Target count")
guidance_scale: Optional[float] = Field(default=3, ge=1, le=10, description="Guidance scale")
# ==================== AUDIO MODELS ====================
class TextToAudioRequest(BaseModel):
"""Request model for text-to-audio generation."""
prompt: str = Field(..., max_length=10000, description="Audio generation prompt")
duration: Optional[float] = Field(default=190, ge=1, le=190, description="Duration in seconds")
seed: Optional[int] = Field(default=0, ge=0, le=4294967294, description="Random seed")
steps: Optional[int] = Field(default=None, description="Sampling steps (model-dependent)")
cfg_scale: Optional[float] = Field(default=None, ge=1, le=25, description="CFG scale")
model: Optional[AudioModel] = Field(default=AudioModel.STABLE_AUDIO_2, description="Audio model")
output_format: Optional[AudioOutputFormat] = Field(default=AudioOutputFormat.MP3, description="Output format")
class AudioToAudioRequest(BaseModel):
"""Request model for audio-to-audio generation."""
prompt: str = Field(..., max_length=10000, description="Audio generation prompt")
duration: Optional[float] = Field(default=190, ge=1, le=190, description="Duration in seconds")
seed: Optional[int] = Field(default=0, ge=0, le=4294967294, description="Random seed")
steps: Optional[int] = Field(default=None, description="Sampling steps (model-dependent)")
cfg_scale: Optional[float] = Field(default=None, ge=1, le=25, description="CFG scale")
model: Optional[AudioModel] = Field(default=AudioModel.STABLE_AUDIO_2, description="Audio model")
output_format: Optional[AudioOutputFormat] = Field(default=AudioOutputFormat.MP3, description="Output format")
strength: Optional[float] = Field(default=1, ge=0, le=1, description="Audio influence strength")
class AudioInpaintRequest(BaseModel):
"""Request model for audio inpainting."""
prompt: str = Field(..., max_length=10000, description="Audio generation prompt")
duration: Optional[float] = Field(default=190, ge=1, le=190, description="Duration in seconds")
seed: Optional[int] = Field(default=0, ge=0, le=4294967294, description="Random seed")
steps: Optional[int] = Field(default=8, ge=4, le=8, description="Sampling steps")
output_format: Optional[AudioOutputFormat] = Field(default=AudioOutputFormat.MP3, description="Output format")
mask_start: Optional[float] = Field(default=30, ge=0, le=190, description="Mask start time")
mask_end: Optional[float] = Field(default=190, ge=0, le=190, description="Mask end time")
# ==================== RESPONSE MODELS ====================
class GenerationResponse(BaseModel):
"""Response model for generation requests."""
id: str = Field(..., description="Generation ID for async operations")
class ImageGenerationResponse(BaseModel):
"""Response model for direct image generation."""
image: Optional[str] = Field(default=None, description="Base64 encoded image")
seed: Optional[int] = Field(default=None, description="Seed used for generation")
finish_reason: Optional[FinishReason] = Field(default=None, description="Generation finish reason")
class AudioGenerationResponse(BaseModel):
"""Response model for audio generation."""
audio: Optional[str] = Field(default=None, description="Base64 encoded audio")
seed: Optional[int] = Field(default=None, description="Seed used for generation")
finish_reason: Optional[FinishReason] = Field(default=None, description="Generation finish reason")
class GenerationStatusResponse(BaseModel):
"""Response model for generation status."""
id: str = Field(..., description="Generation ID")
status: Literal["in-progress"] = Field(..., description="Generation status")
class ErrorResponse(BaseModel):
"""Error response model."""
id: str = Field(..., description="Error ID")
name: str = Field(..., description="Error name")
errors: List[str] = Field(..., description="Error messages")
# ==================== LEGACY V1 MODELS ====================
class TextPrompt(BaseModel):
"""Text prompt for V1 API."""
text: str = Field(..., max_length=2000, description="Prompt text")
weight: Optional[float] = Field(default=1.0, description="Prompt weight")
class V1TextToImageRequest(BaseModel):
"""V1 Text-to-image request."""
text_prompts: List[TextPrompt] = Field(..., min_items=1, description="Text prompts")
height: Optional[int] = Field(default=512, ge=128, description="Image height")
width: Optional[int] = Field(default=512, ge=128, description="Image width")
cfg_scale: Optional[float] = Field(default=7, ge=0, le=35, description="CFG scale")
samples: Optional[int] = Field(default=1, ge=1, le=10, description="Number of samples")
steps: Optional[int] = Field(default=30, ge=10, le=50, description="Diffusion steps")
seed: Optional[int] = Field(default=0, ge=0, le=4294967295, description="Random seed")
class V1ImageToImageRequest(BaseModel):
"""V1 Image-to-image request."""
text_prompts: List[TextPrompt] = Field(..., min_items=1, description="Text prompts")
image_strength: Optional[float] = Field(default=0.35, ge=0, le=1, description="Image strength")
init_image_mode: Optional[str] = Field(default="IMAGE_STRENGTH", description="Init image mode")
cfg_scale: Optional[float] = Field(default=7, ge=0, le=35, description="CFG scale")
samples: Optional[int] = Field(default=1, ge=1, le=10, description="Number of samples")
steps: Optional[int] = Field(default=30, ge=10, le=50, description="Diffusion steps")
seed: Optional[int] = Field(default=0, ge=0, le=4294967295, description="Random seed")
class V1MaskingRequest(BaseModel):
"""V1 Masking request."""
text_prompts: List[TextPrompt] = Field(..., min_items=1, description="Text prompts")
mask_source: str = Field(..., description="Mask source")
cfg_scale: Optional[float] = Field(default=7, ge=0, le=35, description="CFG scale")
samples: Optional[int] = Field(default=1, ge=1, le=10, description="Number of samples")
steps: Optional[int] = Field(default=30, ge=10, le=50, description="Diffusion steps")
seed: Optional[int] = Field(default=0, ge=0, le=4294967295, description="Random seed")
class V1GenerationArtifact(BaseModel):
"""V1 Generation artifact."""
base64: str = Field(..., description="Base64 encoded image")
seed: int = Field(..., description="Generation seed")
finishReason: str = Field(..., description="Finish reason")
class V1GenerationResponse(BaseModel):
"""V1 Generation response."""
artifacts: List[V1GenerationArtifact] = Field(..., description="Generated artifacts")
# ==================== USER & ACCOUNT MODELS ====================
class OrganizationMembership(BaseModel):
"""Organization membership details."""
id: str = Field(..., description="Organization ID")
name: str = Field(..., description="Organization name")
role: str = Field(..., description="User role")
is_default: bool = Field(..., description="Is default organization")
class AccountResponse(BaseModel):
"""Account details response."""
id: str = Field(..., description="User ID")
email: str = Field(..., description="User email")
profile_picture: str = Field(..., description="Profile picture URL")
organizations: List[OrganizationMembership] = Field(..., description="Organizations")
class BalanceResponse(BaseModel):
"""Balance response."""
credits: float = Field(..., description="Credit balance")
class Engine(BaseModel):
"""Engine details."""
id: str = Field(..., description="Engine ID")
name: str = Field(..., description="Engine name")
description: str = Field(..., description="Engine description")
type: str = Field(..., description="Engine type")
class ListEnginesResponse(BaseModel):
"""List engines response."""
engines: List[Engine] = Field(..., description="Available engines")
# ==================== MULTIPART FORM MODELS ====================
class MultipartImageRequest(BaseModel):
"""Base multipart request with image."""
image: bytes = Field(..., description="Image file binary data")
class MultipartAudioRequest(BaseModel):
"""Base multipart request with audio."""
audio: bytes = Field(..., description="Audio file binary data")
class MultipartMaskRequest(BaseModel):
"""Multipart request with image and mask."""
image: bytes = Field(..., description="Image file binary data")
mask: Optional[bytes] = Field(default=None, description="Mask file binary data")
class MultipartStyleTransferRequest(BaseModel):
"""Multipart request for style transfer."""
init_image: bytes = Field(..., description="Initial image binary data")
style_image: bytes = Field(..., description="Style image binary data")
class MultipartReplaceBackgroundRequest(BaseModel):
"""Multipart request for background replacement."""
subject_image: bytes = Field(..., description="Subject image binary data")
background_reference: Optional[bytes] = Field(default=None, description="Background reference image")
light_reference: Optional[bytes] = Field(default=None, description="Light reference image")