feat: Improve image generation prompts with visual data extraction

- Add dedicated image_generation module with statistical extraction
- Support 16 industry domains with visual concept detection
- Add model-specific guidance for Ideogram, FLUX, GLM, Qwen, MAI
- Extract statistics, rankings, comparisons, and trends automatically
- Refactor backend/api/images.py to use new module
This commit is contained in:
ajaysi
2026-03-29 10:16:40 +05:30
parent f503a24b3b
commit d6ad903e3d
5 changed files with 983 additions and 80 deletions

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import base64
import os
import uuid
from typing import Optional, Dict, Any
from typing import Optional, Dict, Any, List
from datetime import datetime
from pathlib import Path
from sqlalchemy.orm import Session
@@ -15,6 +15,11 @@ from pydantic import BaseModel, Field
from services.llm_providers.main_image_generation import generate_image
from services.llm_providers.main_image_editing import edit_image
from services.llm_providers.main_text_generation import llm_text_gen
from services.image_generation import (
extract_visual_data as _extract_visual_data,
get_model_recommendation,
build_visual_summary,
)
from utils.logger_utils import get_service_logger
from middleware.auth_middleware import get_current_user
from services.database import get_db
@@ -291,8 +296,8 @@ class PromptSuggestion(BaseModel):
class ImagePromptSuggestRequest(BaseModel):
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability|wavespeed)$")
model: Optional[str] = None # Specific model (e.g., "qwen-image", "ideogram-v3-turbo")
image_type: Optional[str] = Field(None, pattern="^(realistic|chart|conceptual|diagram|illustration|background)$")
model: Optional[str] = None # Specific model (e.g., "qwen-image", "ideogram-v3-turbo", "flux-2-flex", "glm-image")
image_type: Optional[str] = Field(None, pattern="^(realistic|chart|conceptual|diagram|illustration|background|infographic)$")
title: Optional[str] = None
section: Optional[Dict[str, Any]] = None
research: Optional[Dict[str, Any]] = None
@@ -459,6 +464,150 @@ MODEL_SPECIFIC_GUIDANCE = {
"High contrast areas for text placement"
]
}
},
"flux-2-flex": {
"text_overlay": {
"guidance": "FLUX 2 Flex excels at typography control and text rendering. Excellent for posters, memes, and designs requiring precise text placement.",
"best_practices": [
"Best for images requiring clear, readable text with precise placement",
"Superior typography control compared to other models",
"Can handle various text styles and sizes",
"Ideal for poster-style blog images with embedded headlines",
"Great for quote images and text-heavy designs"
],
"negative_prompt_additions": "blurry text, distorted letters, low quality typography"
},
"realistic": {
"guidance": "Photorealistic generation with excellent typography integration. Text appears naturally within scenes.",
"best_practices": [
"Include typography as a natural part of the scene",
"Specify text style, size, and placement clearly",
"Use for realistic scenes with signage, labels, or text elements",
"Professional quality with consistent text rendering"
]
},
"chart": {
"guidance": "Can render charts with text labels. Use simple chart designs with clear typography.",
"best_practices": [
"Simple bar charts, pie charts, or line graphs",
"Clear typography for labels and legends",
"Clean data visualization design",
"Avoid overly complex infographic layouts"
]
},
"infographic": {
"guidance": "Excellent for infographic-style images with clear sections and typography. Multi-panel layouts work well.",
"best_practices": [
"Use for multi-section infographics with distinct areas",
"Clear typography placement in designated zones",
"Clean, organized layout with visual hierarchy",
"Professional infographic design with text integration"
]
},
"conceptual": {
"guidance": "Conceptual imagery with typography support. Text can be integrated naturally into abstract designs.",
"best_practices": [
"Integrate text into conceptual designs as a visual element",
"Use typography to enhance conceptual messaging",
"Clear, readable text in abstract compositions"
]
}
},
"glm-image": {
"text_overlay": {
"guidance": "GLM-Image excels at infographics, educational diagrams, and professional poster designs. Strong text rendering capabilities.",
"best_practices": [
"Best for educational content, infographics, and diagrams",
"Excellent for multi-panel layouts and structured designs",
"Good text rendering with clear typography",
"Professional infographic aesthetics",
"Strong for academic or professional blog images"
],
"negative_prompt_additions": "watermarks, distorted text, low quality diagrams"
},
"realistic": {
"guidance": "Photorealistic generation with good quality. Professional presentation style.",
"best_practices": [
"Include professional lighting and composition",
"Use for polished, professional imagery",
"Quality descriptors improve output consistency"
]
},
"chart": {
"guidance": "Excellent for data visualizations. Can render charts with clear labels and professional styling.",
"best_practices": [
"Professional chart designs with clear typography",
"Data visualizations with embedded labels",
"Clean infographic-style charts",
"Good for statistical blog content"
]
},
"infographic": {
"guidance": "Best model choice for complex infographics. Multi-section layouts with clear visual hierarchy.",
"best_practices": [
"Use for comprehensive infographics with multiple data points",
"Clear section boundaries and visual hierarchy",
"Professional infographic aesthetic",
"Excellent for educational or how-to content",
"Multi-panel designs with distinct information areas"
]
},
"diagram": {
"guidance": "Excellent for technical diagrams and process illustrations. Clear visual representation of complex information.",
"best_practices": [
"Use for process flows, architectural diagrams, technical illustrations",
"Clear visual hierarchy and labeling",
"Professional diagram aesthetics",
"Educational content visualization"
]
},
"conceptual": {
"guidance": "Professional conceptual imagery. Good for abstract representations with clear messaging.",
"best_practices": [
"Clear visual metaphors for abstract concepts",
"Professional presentation style",
"Good for educational or explanatory content"
]
}
},
# Default guidance for unknown models
"_default": {
"text_overlay": {
"guidance": "Design for text overlay areas. Create clean backgrounds with high-contrast safe zones for text placement.",
"best_practices": [
"Use designated text areas (top 20% or bottom 20%)",
"Create clean, uncluttered backgrounds",
"Avoid embedding text directly in the image",
"Design for text to be added as overlay"
],
"negative_prompt_additions": "text artifacts, unreadable text, embedded words"
},
"conceptual": {
"guidance": "Focus on visual metaphors and abstract representations of the topic.",
"best_practices": [
"Use visual metaphors relevant to the content",
"Create simple, clear compositions",
"Avoid busy or cluttered designs"
]
},
"chart": {
"guidance": "Use abstract data representations. Avoid actual charts with embedded text.",
"best_practices": [
"Create visual metaphors for data",
"Use shapes, colors, and patterns to represent information",
"Design with text overlay zones for labels"
],
"warnings": ["Do not request actual charts with text - use abstract representations"]
},
"infographic": {
"guidance": "Create multi-section infographic layouts with clear visual hierarchy. Use text overlay zones for information.",
"best_practices": [
"Multi-panel designs with distinct sections",
"Clear visual hierarchy and organization",
"Design with text overlay zones for each section",
"Professional infographic aesthetic"
]
}
}
}
@@ -471,8 +620,8 @@ def get_model_specific_guidance(model: Optional[str], image_type: Optional[str])
model_lower = model.lower()
image_type_lower = (image_type or "conceptual").lower()
# Get model guidance
model_guidance = MODEL_SPECIFIC_GUIDANCE.get(model_lower, {})
# Get model guidance (use _default for unknown models)
model_guidance = MODEL_SPECIFIC_GUIDANCE.get(model_lower, MODEL_SPECIFIC_GUIDANCE.get("_default", {}))
# Get image type specific guidance
type_guidance = model_guidance.get(image_type_lower, model_guidance.get("text_overlay", {}))
@@ -480,63 +629,6 @@ def get_model_specific_guidance(model: Optional[str], image_type: Optional[str])
return type_guidance
def extract_visual_data(section: Dict[str, Any], research: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Intelligently extract visual-relevant data from section and research."""
visual_data = {
"visual_keywords": [],
"data_points": [],
"concepts": [],
"statistics": []
}
# Extract from section
if section:
# Key points that are visualizable
key_points = section.get("key_points", []) or []
for point in key_points[:5]:
if isinstance(point, str):
# Look for numbers, percentages, comparisons
if any(char.isdigit() for char in point):
visual_data["statistics"].append(point)
# Look for visual concepts
elif any(word in point.lower() for word in ["increase", "decrease", "growth", "trend", "pattern", "comparison"]):
visual_data["data_points"].append(point)
else:
visual_data["concepts"].append(point)
# Subheadings that suggest visuals
subheadings = section.get("subheadings", []) or []
for subhead in subheadings[:3]:
if isinstance(subhead, str):
visual_data["concepts"].append(subhead)
# Keywords
keywords = section.get("keywords", []) or []
visual_data["visual_keywords"].extend([str(k) for k in keywords[:8] if k])
# Extract from research
if research:
# Key facts that are visualizable
key_facts = research.get("key_facts", []) or research.get("highlights", []) or []
for fact in key_facts[:3]:
if isinstance(fact, str):
if any(char.isdigit() for char in fact):
visual_data["statistics"].append(fact)
else:
visual_data["data_points"].append(fact)
# Research insights
insights = research.get("insights", []) or research.get("summary", "")
if isinstance(insights, str) and insights:
# Extract key phrases
sentences = insights.split('.')[:3]
visual_data["concepts"].extend([s.strip() for s in sentences if s.strip()])
elif isinstance(insights, list):
visual_data["concepts"].extend([str(i) for i in insights[:3]])
return visual_data
@router.post("/suggest-prompts", response_model=ImagePromptSuggestResponse)
def suggest_prompts(
req: ImagePromptSuggestRequest,
@@ -564,8 +656,18 @@ def suggest_prompts(
industry = persona.get("industry", req.research.get("domain") if req.research else "your industry")
tone = persona.get("tone", "professional, trustworthy")
# Extract visual-relevant data intelligently
visual_data = extract_visual_data(section, req.research)
# Extract visual-relevant data intelligently using the new module
visual_data = _extract_visual_data(section, req.research)
# Get model recommendation based on content type
model_recommendation = get_model_recommendation(visual_data)
# Build visual summary from extracted data
visual_summary = build_visual_summary(visual_data)
# Add model recommendation to visual summary if available
if model_recommendation:
visual_summary += model_recommendation
schema = {
"type": "object",
@@ -620,19 +722,6 @@ def suggest_prompts(
if model_warnings:
provider_guidance += f"\n⚠️ WARNINGS:\n" + "\n".join([f"- {w}" for w in model_warnings])
# Build visual data summary from extracted data
visual_summary_parts = []
if visual_data["statistics"]:
visual_summary_parts.append(f"Key Statistics: {', '.join(visual_data['statistics'][:3])}")
if visual_data["data_points"]:
visual_summary_parts.append(f"Data Points: {', '.join(visual_data['data_points'][:3])}")
if visual_data["concepts"]:
visual_summary_parts.append(f"Visual Concepts: {', '.join(visual_data['concepts'][:5])}")
if visual_data["visual_keywords"]:
visual_summary_parts.append(f"Keywords: {', '.join(visual_data['visual_keywords'][:8])}")
visual_summary = "\n".join(visual_summary_parts) if visual_summary_parts else ""
best_practices = (
"BLOG IMAGE BEST PRACTICES: Create images optimized for blog content, not social media posters. "
"Focus on: data visualization elements (charts, graphs, infographics), clean layouts with designated text overlay areas, "
@@ -654,14 +743,15 @@ def suggest_prompts(
else "Do not include on-image text, but still design with text overlay areas in mind for blog use."
)
# Image type specific guidance
# Image type specific guidance (enhanced with infographic type)
image_type_guidance = {
"realistic": "Photorealistic style with professional photography quality. Include camera settings and lighting details.",
"chart": "⚠️ IMPORTANT: Complex infographics are too difficult for current AI models. Create simple visual representations with designated text overlay areas instead. Use abstract data visualization elements, not actual charts with embedded text.",
"conceptual": "Abstract or conceptual imagery that represents the topic visually. Clean compositions with text overlay zones.",
"diagram": "Technical diagrams with simple, clear visual elements. Design for text overlay areas, not embedded labels.",
"illustration": "Stylized illustrations that support the content. Professional, clean aesthetic suitable for blog use.",
"background": "Background images optimized for text overlays. Clean, uncluttered compositions with high-contrast text zones."
"background": "Background images optimized for text overlays. Clean, uncluttered compositions with high-contrast text zones.",
"infographic": "Multi-section infographic designs with clear visual hierarchy. Use designated areas for each data point or concept. Design with text overlay zones for information labels. Professional infographic aesthetics with clean, organized layouts."
}.get(image_type, "General blog image guidance.")
# Build comprehensive prompt with visual data and model-specific guidance

View File

@@ -0,0 +1,22 @@
"""
Image Generation Services.
This package provides services for AI-powered image generation,
including visual data extraction and prompt optimization.
"""
from .visual_data_extractor import (
extract_visual_data,
get_model_recommendation,
build_visual_summary,
ExtractedVisualData,
DOMAIN_VISUAL_CONCEPTS,
)
__all__ = [
"extract_visual_data",
"get_model_recommendation",
"build_visual_summary",
"ExtractedVisualData",
"DOMAIN_VISUAL_CONCEPTS",
]

View File

@@ -0,0 +1,570 @@
"""
Visual Data Extractor for Image Generation Prompts.
This module provides intelligent extraction of visual-relevant data from blog sections
and research data to generate contextually relevant image prompts.
Key Features:
- Statistics extraction with regex patterns
- Domain-specific visual concept detection
- Research source mining for visual data
- Deduplication and data cleaning
"""
import re
from typing import Dict, List, Optional, Set, Tuple
from dataclasses import dataclass, field
# Pre-compiled regex patterns for performance
_STATISTICAL_PATTERNS: List[Tuple[str, re.Pattern]] = [
('percentage', re.compile(r'\d+[\d,]*%', re.IGNORECASE)),
('currency', re.compile(r'\$[\d,]+(?:\.\d{2})?', re.IGNORECASE)),
('multiplier', re.compile(r'\d+[\d,]*x', re.IGNORECASE)),
('large_number', re.compile(r'\d+[\d,]*\s*(?:million|billion|thousand|trillion)s?', re.IGNORECASE)),
('range', re.compile(r'\d+\s*-\s*\d+%', re.IGNORECASE)),
('change_up', re.compile(r'up\s+by\s+\d+%', re.IGNORECASE)),
('change_down', re.compile(r'down\s+by\s+\d+%', re.IGNORECASE)),
('growth', re.compile(r'(?:increased|decreased|grew|declined)\s*[\d%]+', re.IGNORECASE)),
('cagr', re.compile(r'cagr\s+of\s+[\d.]+%', re.IGNORECASE)),
]
_VISUAL_DATA_PATTERNS: List[Tuple[str, re.Pattern]] = [
('times', re.compile(r'\d+\s*(?:times|folds?)', re.IGNORECASE)),
('ranking', re.compile(r'rank(?:ed|ing)?\s*(?:#?\d+|first|second|third|top|bottom)', re.IGNORECASE)),
('comparison', re.compile(r'(?:vs|versus|compared\s+to|compared\s+with)', re.IGNORECASE)),
('chart_mention', re.compile(r'(?:chart|graph|diagram|visual|infographic)', re.IGNORECASE)),
('superlative', re.compile(r'(?:best|worst|leading|top|highest|lowest)', re.IGNORECASE)),
]
_TREND_KEYWORDS: Set[str] = {
'increase', 'decrease', 'growth', 'trend', 'pattern', 'comparison',
'ranking', 'versus', 'vs', 'rise', 'fall', 'growth', 'decline',
'surge', 'drop', 'climb', 'jump', 'plummet', 'soar', 'fluctuate'
}
# Domain-specific visual concepts mapping
DOMAIN_VISUAL_CONCEPTS: Dict[str, List[str]] = {
"tech": [
"circuit board patterns", "digital interface", "data stream", "network nodes",
"server racks", "silicon chips", "binary code", "cloud computing",
"artificial intelligence", "machine learning model", "software code",
"technology innovation", "digital transformation"
],
"healthcare": [
"stethoscope", "medical chart", "hospital equipment", "DNA helix",
"heart rate monitor", "medical cross", "prescription", "patient care",
"healthcare professional", "medical research", "wellness", "health metrics"
],
"finance": [
"stock chart", "dollar signs", "investment growth", "banking",
"pie chart", "financial graph", "portfolio", "market trends",
"cryptocurrency", "blockchain", "financial analysis", "wealth management"
],
"marketing": [
"digital marketing", "social media", "content strategy", "audience growth",
"brand awareness", "conversion funnel", "engagement metrics", "ROI chart",
"marketing analytics", "customer acquisition", "viral content"
],
"education": [
"classroom", "graduation cap", "books", "learning curve",
"knowledge growth", "student achievement", "online learning", "curriculum",
"educational technology", "academic success", "skill development"
],
"ecommerce": [
"shopping cart", "product display", "checkout flow", "conversion",
"customer journey", "inventory", "shipping", "discount tags",
"online store", "e-commerce analytics", "retail technology"
],
"real_estate": [
"building", "house", "property", "real estate market",
"mortgage", "home ownership", "apartment complex", "construction",
"property investment", "housing market", "architecture"
],
"food": [
"restaurant", "cooking", "ingredients", "food preparation",
"recipe", "menu", "dining experience", "culinary arts",
"gourmet", "food photography", "healthy eating"
],
"travel": [
"airplane", "destination", "map", "luggage", "passport",
"tourist", "hotel", "beach resort", "adventure", "travel planning",
"vacation", "world exploration"
],
"fitness": [
"gym", "workout", "exercise", "muscle", "weight loss",
"nutrition", "running", "yoga", "healthy lifestyle", "fitness tracking",
"sports training", "wellness"
],
"fashion": [
"clothing", "wardrobe", "style", "runway", "designer",
"outfit", "accessories", "fashion trends", "personal style", "apparel"
],
"entertainment": [
"movie reel", "music note", "concert", "celebrity", "streaming",
"gaming", "content creation", "media production", "creative arts", "performance"
],
"business": [
"office", "meeting", "presentation", "business growth", "strategy",
"team collaboration", "enterprise", "corporate", "leadership", "productivity"
],
"science": [
"laboratory", "microscope", "experiment", "data analysis", "research",
"scientific method", "discovery", "innovation", "technology development"
],
"sports": [
"stadium", "athlete", "scoreboard", "trophy", "team",
"competition", "fitness", "championship", "sports analytics", "training"
],
"legal": [
"gavel", "courthouse", "legal documents", "scales of justice",
"law books", "legal contract", "attorney", "courtroom", "compliance"
],
"environmental": [
"renewable energy", "solar panels", "wind turbines", "green technology",
"sustainability", "climate change", "eco-friendly", "nature conservation"
],
}
@dataclass
class ExtractedVisualData:
"""Data class for extracted visual data."""
visual_keywords: List[str] = field(default_factory=list)
data_points: List[str] = field(default_factory=list)
concepts: List[str] = field(default_factory=list)
statistics: List[str] = field(default_factory=list)
domain_concepts: List[str] = field(default_factory=list)
visual_metaphors: List[str] = field(default_factory=list)
detected_domains: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, List[str]]:
"""Convert to dictionary for serialization."""
return {
"visual_keywords": self.visual_keywords,
"data_points": self.data_points,
"concepts": self.concepts,
"statistics": self.statistics,
"domain_concepts": self.domain_concepts,
"visual_metaphors": self.visual_metaphors,
"detected_domains": self.detected_domains,
}
def has_statistics(self) -> bool:
"""Check if any statistics were extracted."""
return bool(self.statistics)
def has_data_points(self) -> bool:
"""Check if any data points were extracted."""
return bool(self.data_points)
def has_domain_concepts(self) -> bool:
"""Check if any domain concepts were extracted."""
return bool(self.domain_concepts)
def is_data_heavy(self) -> bool:
"""Check if content is data-heavy (statistics or data points)."""
return self.has_statistics() or self.has_data_points()
def get_recommended_image_type(self) -> str:
"""Get recommended image type based on extracted data."""
if self.has_statistics() or self.has_data_points():
return "infographic" if self.has_domain_concepts() else "chart"
elif self.has_domain_concepts():
return "conceptual"
return "conceptual"
def _extract_statistic_with_context(text: str) -> Optional[str]:
"""
Extract a statistic with surrounding context from text.
Args:
text: Input text to search
Returns:
Statistic with context (up to 60 chars before + statistic + 30 chars after),
or None if no statistic found
"""
for pattern_name, pattern in _STATISTICAL_PATTERNS:
match = pattern.search(text)
if match:
idx = match.start()
context_start = max(0, idx - 60)
context_end = min(len(text), match.end() + 30)
context = text[context_start:context_end].strip()
# Clean up to word boundaries
if context_start > 0:
# Find first space in context
first_space = context.find(' ')
if first_space > 0 and first_space < 20:
context = context[first_space + 1:]
return context
return None
def _has_visual_mention(text: str) -> bool:
"""
Check if text contains mentions of visual concepts.
Args:
text: Input text to check
Returns:
True if text contains visual data patterns
"""
for pattern_name, pattern in _VISUAL_DATA_PATTERNS:
if pattern.search(text):
return True
return False
def _has_trend_keyword(text: str) -> bool:
"""
Check if text contains trend/comparison keywords.
Args:
text: Input text to check
Returns:
True if text contains trend keywords
"""
text_lower = text.lower()
return any(keyword in text_lower for keyword in _TREND_KEYWORDS)
def _detect_domains_in_text(text: str) -> Tuple[List[str], List[str]]:
"""
Detect industry/domain from text and return relevant visual concepts.
Args:
text: Input text to analyze
Returns:
Tuple of (detected_domain_names, domain_concepts)
"""
text_lower = text.lower()
detected_domains: List[str] = []
all_concepts: List[str] = []
for domain, concepts in DOMAIN_VISUAL_CONCEPTS.items():
# Check if domain name or any concept keyword is in text
keywords_to_check = [domain] + concepts[:5]
if any(keyword in text_lower for keyword in keywords_to_check):
detected_domains.append(domain)
# Add top 3 concepts for this domain
all_concepts.extend(concepts[:3])
return detected_domains, list(set(all_concepts))
def _deduplicate_and_limit(
items: List[str],
max_items: int = 10,
key_length: int = 50
) -> List[str]:
"""
Deduplicate items by normalized key and limit count.
Args:
items: List of strings to deduplicate
max_items: Maximum number of items to return
key_length: Length of normalized key for comparison
Returns:
Deduplicated list with max_items items
"""
seen: Set[str] = set()
unique_items: List[str] = []
for item in items:
if not item or not isinstance(item, str):
continue
normalized = item.lower().strip()[:key_length]
if normalized and normalized not in seen and len(unique_items) < max_items:
seen.add(normalized)
unique_items.append(item.strip())
return unique_items
def extract_visual_data(
section: Optional[Dict[str, any]],
research: Optional[Dict[str, any]]
) -> ExtractedVisualData:
"""
Intelligently extract visual-relevant data from blog section and research.
This function analyzes section headings, key points, subheadings, keywords,
and research data to extract statistics, data points, visual concepts,
and domain-specific visual metaphors.
Args:
section: Blog section dictionary with optional keys:
- heading: Section title
- subheadings: List of subheading strings
- key_points: List of key point strings
- keywords: List of keyword strings
research: Research data dictionary with optional keys:
- key_facts, highlights: List of fact strings
- insights, summary: String or list of insight strings
- sources, references: List of source dictionaries
- keywords: Dict or list of keywords
- domain, industry: Domain/industry string
Returns:
ExtractedVisualData dataclass with extracted information
Example:
>>> section = {
... "heading": "AI in Healthcare",
... "key_points": ["Market grew 40% in 2023", "Investment reached $5B"]
... }
>>> result = extract_visual_data(section, None)
>>> result.statistics
['Market grew 40% in 2023', 'Investment reached $5B']
>>> result.domain_concepts
['stethoscope', 'medical chart', 'hospital equipment']
"""
result = ExtractedVisualData()
# Phase 1: Extract from section
if section:
_extract_from_section(section, result)
# Phase 2: Extract from research
if research:
_extract_from_research(research, result)
# Phase 3: Deduplicate all extracted data
_deduplicate_results(result)
return result
def _extract_from_section(section: Dict, result: ExtractedVisualData) -> None:
"""Extract visual data from blog section."""
# Extract from key points
key_points = section.get("key_points", []) or []
for point in key_points[:10]: # Increased limit
if not isinstance(point, str):
continue
# Check for statistics
stat = _extract_statistic_with_context(point)
if stat:
result.statistics.append(stat)
# Also detect domains in statistical points
domains, concepts = _detect_domains_in_text(point)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
continue
# Check for visual mentions or trend keywords
if _has_visual_mention(point) or _has_trend_keyword(point):
result.data_points.append(point)
else:
result.concepts.append(point)
# Detect domains in regular concepts too
domains, concepts = _detect_domains_in_text(point)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Extract from subheadings
subheadings = section.get("subheadings", []) or []
for subhead in subheadings[:7]:
if isinstance(subhead, str):
result.concepts.append(subhead)
domains, concepts = _detect_domains_in_text(subhead)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Extract from keywords
keywords = section.get("keywords", []) or []
for kw in keywords[:12]:
if kw and isinstance(kw, str):
result.visual_keywords.append(str(kw))
# Detect domain from section heading
heading = section.get("heading", "")
if heading and isinstance(heading, str):
domains, concepts = _detect_domains_in_text(heading)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Also add heading as a concept
if heading.strip():
result.concepts.insert(0, heading.strip())
def _extract_from_research(research: Dict, result: ExtractedVisualData) -> None:
"""Extract visual data from research data."""
# Extract from key facts/highlights
key_facts = research.get("key_facts", []) or research.get("highlights", []) or []
for fact in key_facts[:7]:
if isinstance(fact, str):
stat = _extract_statistic_with_context(fact)
if stat:
result.statistics.append(stat)
else:
result.data_points.append(fact)
# Extract from insights/summary
insights = research.get("insights", []) or research.get("summary", "")
if isinstance(insights, str) and insights:
sentences = insights.split('.')[:7]
for sent in sentences:
sent = sent.strip()
if sent:
stat = _extract_statistic_with_context(sent)
if stat:
result.statistics.append(stat)
else:
result.concepts.append(sent)
elif isinstance(insights, list):
for insight in insights[:7]:
if isinstance(insight, str):
stat = _extract_statistic_with_context(insight)
if stat:
result.statistics.append(stat)
else:
result.concepts.append(insight)
# Extract from research sources
sources = research.get("sources", []) or research.get("references", []) or []
for source in sources[:7]:
if not isinstance(source, dict):
continue
# Extract from source title
source_title = source.get("title", "")
if source_title:
domains, concepts = _detect_domains_in_text(source_title)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Extract from source excerpt/snippet
source_excerpt = (
source.get("excerpt", "")
or source.get("snippet", "")
or source.get("description", "")
)
if source_excerpt:
# Extract statistic
stat = _extract_statistic_with_context(source_excerpt)
if stat:
result.statistics.append(stat)
# Add as data point (limited to 200 chars)
excerpt_text = source_excerpt[:200] if len(source_excerpt) > 200 else source_excerpt
result.data_points.append(excerpt_text)
# Check for visual mentions
if _has_visual_mention(source_excerpt):
result.data_points.append(source_excerpt[:300])
# Detect domains
domains, concepts = _detect_domains_in_text(source_excerpt)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Extract from research keywords
research_keywords = research.get("keywords", {})
if isinstance(research_keywords, dict):
primary_kw = (
research_keywords.get("primary_keywords", [])
or research_keywords.get("primary", [])
or []
)
for kw in primary_kw[:7]:
if isinstance(kw, str):
domains, concepts = _detect_domains_in_text(kw)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
elif isinstance(research_keywords, list):
for kw in research_keywords[:7]:
if isinstance(kw, str):
domains, concepts = _detect_domains_in_text(kw)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
# Extract from research domain/industry
research_domain = research.get("domain", "") or research.get("industry", "")
if research_domain:
domains, concepts = _detect_domains_in_text(research_domain)
result.detected_domains.extend(domains)
result.domain_concepts.extend(concepts)
def _deduplicate_results(result: ExtractedVisualData) -> None:
"""Deduplicate all extracted data."""
result.visual_keywords = _deduplicate_and_limit(result.visual_keywords, 12)
result.data_points = _deduplicate_and_limit(result.data_points, 10)
result.concepts = _deduplicate_and_limit(result.concepts, 10)
result.statistics = _deduplicate_and_limit(result.statistics, 10)
result.domain_concepts = _deduplicate_and_limit(result.domain_concepts, 10)
result.detected_domains = list(set(result.detected_domains))
def get_model_recommendation(visual_data: ExtractedVisualData) -> Optional[str]:
"""
Get model recommendation based on extracted visual data.
Args:
visual_data: ExtractedVisualData instance
Returns:
Model recommendation string or None
"""
if visual_data.is_data_heavy():
return (
"\n\nMODEL RECOMMENDATION: This section contains data/statistics. "
"Consider using:\n"
"- FLUX Kontext Pro: Best for data visualizations with text labels\n"
"- GLM-Image: Excellent for infographics and educational diagrams\n"
"- Ideogram V3 Turbo: Good for simple charts with text overlays"
)
elif visual_data.has_domain_concepts():
return (
"\n\nMODEL RECOMMENDATION: This section covers domain-specific content. "
"Consider using:\n"
"- Qwen Image: Best for abstract conceptual imagery\n"
"- FLUX Kontext Pro: Good for conceptual imagery with text support\n"
"- FLUX 2 Flex: Excellent for poster-style conceptual designs"
)
return None
def build_visual_summary(visual_data: ExtractedVisualData) -> str:
"""
Build a text summary from extracted visual data.
Args:
visual_data: ExtractedVisualData instance
Returns:
Formatted summary string for use in prompts
"""
parts: List[str] = []
if visual_data.statistics:
parts.append(f"Key Statistics: {', '.join(visual_data.statistics[:3])}")
if visual_data.data_points:
parts.append(f"Data Points: {', '.join(visual_data.data_points[:3])}")
if visual_data.concepts:
parts.append(f"Visual Concepts: {', '.join(visual_data.concepts[:5])}")
if visual_data.visual_keywords:
parts.append(f"Keywords: {', '.join(visual_data.visual_keywords[:8])}")
if visual_data.domain_concepts:
parts.append(f"Domain Visual Concepts: {', '.join(visual_data.domain_concepts[:5])}")
if visual_data.detected_domains:
parts.append(f"Detected Domains: {', '.join(visual_data.detected_domains)}")
return "\n".join(parts) if parts else ""

View File

@@ -0,0 +1,221 @@
# Image Generation for Blog Writer - Technical Documentation
## Overview
This document describes the improvements made to image generation for the ALwrity Blog Writer feature, making generated images more relevant to blog content through intelligent visual data extraction and model selection.
## Architecture
### New Module Structure
```
backend/services/image_generation/
├── __init__.py # Package exports
└── visual_data_extractor.py # Core extraction logic
backend/api/images.py # Updated to use new module
```
### Key Components
1. **Visual Data Extractor** (`visual_data_extractor.py`)
- Extracts statistics, data points, visual concepts, and domain-specific imagery
- Pre-compiled regex patterns for performance
- Domain detection across 16 industry verticals
- Dataclass-based return type for type safety
2. **Model-Specific Guidance** (`images.py`)
- Extended guidance for 5 models (Ideogram V3, FLUX Kontext Pro, Qwen Image, FLUX 2 Flex, GLM-Image)
- Image type recommendations (infographic, chart, conceptual, etc.)
- Content-based model selection
## Features
### 1. Statistics Extraction
**Patterns Supported:**
- Percentages: `42%`, `1,000,000%`
- Currency: `$500`, `$1.5M`
- Multipliers: `5x`, `10x growth`
- Large numbers: `million`, `billion`, `thousand`
- Ranges: `20-30%`
- Change indicators: `up by 30%`, `down by 15%`
- CAGR: `CAGR of 44.9%`
**Example:**
```python
section = {"key_points": ["Market grew 40% in 2023", "Investment reached $5 billion"]}
result = extract_visual_data(section, None)
# result.statistics = ["Market grew 40% in 2023", "Investment reached $5 billion"]
```
### 2. Domain Detection
**Supported Domains (16):**
- Tech (AI, cloud, software, digital transformation)
- Healthcare (medical, hospital, patient care)
- Finance (investment, banking, stock market)
- Marketing (digital marketing, social media, ROI)
- Education (learning, academic, curriculum)
- E-commerce (shopping, conversion, inventory)
- Real Estate (property, mortgage, housing)
- Food (restaurant, cooking, recipe)
- Travel (destination, adventure, vacation)
- Fitness (workout, nutrition, wellness)
- Fashion (clothing, style, designer)
- Entertainment (streaming, gaming, content)
- Business (enterprise, strategy, leadership)
- Science (research, experiment, laboratory)
- Sports (competition, training, championship)
- Legal (compliance, contracts, courtroom)
- Environmental (sustainability, renewable, eco-friendly)
**Example:**
```python
section = {"heading": "AI in Healthcare Market"}
result = extract_visual_data(section, None)
# result.detected_domains = ["healthcare", "tech"]
# result.domain_concepts = ["stethoscope", "medical chart", "hospital equipment"]
```
### 3. Visual Data Patterns
**Detected Patterns:**
- Rankings: `ranked #1`, `top performer`, `leading brand`
- Comparisons: `vs`, `versus`, `compared to`
- Trends: `increase`, `decrease`, `growth`, `surge`
- Multipliers: `5 times`, `3-fold`
### 4. Model Selection Recommendations
Based on extracted content type:
**For Data-Heavy Content (statistics/data points):**
- FLUX Kontext Pro: Best for data visualizations with text labels
- GLM-Image: Excellent for infographics and educational diagrams
- Ideogram V3 Turbo: Good for simple charts with text overlays
**For Domain-Specific Content:**
- Qwen Image: Best for abstract conceptual imagery
- FLUX Kontext Pro: Good for conceptual imagery with text support
- FLUX 2 Flex: Excellent for poster-style conceptual designs
## API Integration
### Endpoint: `POST /api/images/suggest-prompts`
**Request Body:**
```json
{
"provider": "wavespeed",
"model": "flux-kontext-pro",
"image_type": "infographic",
"title": "AI in Healthcare Market",
"section": {
"heading": "Market Growth",
"subheadings": ["Statistics", "Key Players"],
"key_points": ["Market grew 40% in 2023", "Investment reached $5B"]
},
"research": {
"domain": "healthcare",
"key_facts": ["CAGR of 44.9% projected"]
},
"persona": {
"audience": "healthcare professionals",
"tone": "professional"
}
}
```
**Response:**
```json
{
"suggestions": [
{
"prompt": "Professional infographic showing AI healthcare market growth...",
"negative_prompt": "blurry, distorted, text artifacts...",
"width": 1024,
"height": 1024,
"overlay_text": "40% Growth"
}
]
}
```
## Usage Example
```python
from services.image_generation import extract_visual_data, build_visual_summary, get_model_recommendation
# Extract visual data from blog section and research
section = {
"heading": "Digital Marketing Trends 2024",
"key_points": [
"Social media engagement up 60% YoY",
"Video content drives 3x more engagement",
"ROI increased by 45% with personalized campaigns"
],
"keywords": ["marketing", "social media", "ROI"]
}
research = {
"domain": "marketing",
"sources": [
{
"title": "Marketing Trends Report 2024",
"excerpt": "Digital ad spend reached $50 billion, up 25% from last year."
}
]
}
# Extract visual data
result = extract_visual_data(section, research)
# Access extracted data
print(f"Statistics: {result.statistics}")
print(f"Domain: {result.detected_domains}")
print(f"Concepts: {result.domain_concepts}")
# Get model recommendation
rec = get_model_recommendation(result)
print(f"Recommendation: {rec}")
# Build summary for prompt
summary = build_visual_summary(result)
```
## Testing
**Unit Tests:** `backend/tests/services/test_visual_data_extractor.py`
Run tests:
```bash
cd backend
pytest tests/services/test_visual_data_extractor.py -v
```
**Test Coverage:**
- Statistics extraction (8 tests)
- Visual mention detection (5 tests)
- Trend keyword detection (4 tests)
- Domain detection (6 tests)
- Deduplication (5 tests)
- Main extraction function (8 tests)
- Model recommendations (3 tests)
- Visual summary building (3 tests)
- Integration tests (3 tests)
## Performance Considerations
1. **Pre-compiled Regex Patterns**: All regex patterns are compiled once at module load time, not on each function call.
2. **Deduplication**: Results are deduplicated using normalized keys to prevent duplicate entries.
3. **Lazy Evaluation**: Only processes required fields from input data.
## Future Enhancements
1. **Additional Domains**: Support for more industry verticals
2. **Custom Visual Metaphors**: Allow users to define domain-specific visual concepts
3. **A/B Testing**: Compare image relevance across different prompt strategies
4. **Feedback Loop**: Use image selection data to improve future prompt generation

View File

@@ -9,7 +9,7 @@ import InfoIcon from '@mui/icons-material/Info';
import { useImageGeneration, ImageGenerationRequest, fetchPromptSuggestions } from './useImageGeneration';
type Provider = 'huggingface' | 'stability' | 'wavespeed';
type ImageType = 'realistic' | 'chart' | 'conceptual' | 'diagram' | 'illustration' | 'background';
type ImageType = 'realistic' | 'chart' | 'conceptual' | 'diagram' | 'illustration' | 'background' | 'infographic';
interface ImageGeneratorProps {
defaultProvider?: Provider;