feat: Improve image generation prompts with visual data extraction
- Add dedicated image_generation module with statistical extraction - Support 16 industry domains with visual concept detection - Add model-specific guidance for Ideogram, FLUX, GLM, Qwen, MAI - Extract statistics, rankings, comparisons, and trends automatically - Refactor backend/api/images.py to use new module
This commit is contained in:
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import os
|
||||
import uuid
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -15,6 +15,11 @@ from pydantic import BaseModel, Field
|
||||
from services.llm_providers.main_image_generation import generate_image
|
||||
from services.llm_providers.main_image_editing import edit_image
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from services.image_generation import (
|
||||
extract_visual_data as _extract_visual_data,
|
||||
get_model_recommendation,
|
||||
build_visual_summary,
|
||||
)
|
||||
from utils.logger_utils import get_service_logger
|
||||
from middleware.auth_middleware import get_current_user
|
||||
from services.database import get_db
|
||||
@@ -291,8 +296,8 @@ class PromptSuggestion(BaseModel):
|
||||
|
||||
class ImagePromptSuggestRequest(BaseModel):
|
||||
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability|wavespeed)$")
|
||||
model: Optional[str] = None # Specific model (e.g., "qwen-image", "ideogram-v3-turbo")
|
||||
image_type: Optional[str] = Field(None, pattern="^(realistic|chart|conceptual|diagram|illustration|background)$")
|
||||
model: Optional[str] = None # Specific model (e.g., "qwen-image", "ideogram-v3-turbo", "flux-2-flex", "glm-image")
|
||||
image_type: Optional[str] = Field(None, pattern="^(realistic|chart|conceptual|diagram|illustration|background|infographic)$")
|
||||
title: Optional[str] = None
|
||||
section: Optional[Dict[str, Any]] = None
|
||||
research: Optional[Dict[str, Any]] = None
|
||||
@@ -459,6 +464,150 @@ MODEL_SPECIFIC_GUIDANCE = {
|
||||
"High contrast areas for text placement"
|
||||
]
|
||||
}
|
||||
},
|
||||
"flux-2-flex": {
|
||||
"text_overlay": {
|
||||
"guidance": "FLUX 2 Flex excels at typography control and text rendering. Excellent for posters, memes, and designs requiring precise text placement.",
|
||||
"best_practices": [
|
||||
"Best for images requiring clear, readable text with precise placement",
|
||||
"Superior typography control compared to other models",
|
||||
"Can handle various text styles and sizes",
|
||||
"Ideal for poster-style blog images with embedded headlines",
|
||||
"Great for quote images and text-heavy designs"
|
||||
],
|
||||
"negative_prompt_additions": "blurry text, distorted letters, low quality typography"
|
||||
},
|
||||
"realistic": {
|
||||
"guidance": "Photorealistic generation with excellent typography integration. Text appears naturally within scenes.",
|
||||
"best_practices": [
|
||||
"Include typography as a natural part of the scene",
|
||||
"Specify text style, size, and placement clearly",
|
||||
"Use for realistic scenes with signage, labels, or text elements",
|
||||
"Professional quality with consistent text rendering"
|
||||
]
|
||||
},
|
||||
"chart": {
|
||||
"guidance": "Can render charts with text labels. Use simple chart designs with clear typography.",
|
||||
"best_practices": [
|
||||
"Simple bar charts, pie charts, or line graphs",
|
||||
"Clear typography for labels and legends",
|
||||
"Clean data visualization design",
|
||||
"Avoid overly complex infographic layouts"
|
||||
]
|
||||
},
|
||||
"infographic": {
|
||||
"guidance": "Excellent for infographic-style images with clear sections and typography. Multi-panel layouts work well.",
|
||||
"best_practices": [
|
||||
"Use for multi-section infographics with distinct areas",
|
||||
"Clear typography placement in designated zones",
|
||||
"Clean, organized layout with visual hierarchy",
|
||||
"Professional infographic design with text integration"
|
||||
]
|
||||
},
|
||||
"conceptual": {
|
||||
"guidance": "Conceptual imagery with typography support. Text can be integrated naturally into abstract designs.",
|
||||
"best_practices": [
|
||||
"Integrate text into conceptual designs as a visual element",
|
||||
"Use typography to enhance conceptual messaging",
|
||||
"Clear, readable text in abstract compositions"
|
||||
]
|
||||
}
|
||||
},
|
||||
"glm-image": {
|
||||
"text_overlay": {
|
||||
"guidance": "GLM-Image excels at infographics, educational diagrams, and professional poster designs. Strong text rendering capabilities.",
|
||||
"best_practices": [
|
||||
"Best for educational content, infographics, and diagrams",
|
||||
"Excellent for multi-panel layouts and structured designs",
|
||||
"Good text rendering with clear typography",
|
||||
"Professional infographic aesthetics",
|
||||
"Strong for academic or professional blog images"
|
||||
],
|
||||
"negative_prompt_additions": "watermarks, distorted text, low quality diagrams"
|
||||
},
|
||||
"realistic": {
|
||||
"guidance": "Photorealistic generation with good quality. Professional presentation style.",
|
||||
"best_practices": [
|
||||
"Include professional lighting and composition",
|
||||
"Use for polished, professional imagery",
|
||||
"Quality descriptors improve output consistency"
|
||||
]
|
||||
},
|
||||
"chart": {
|
||||
"guidance": "Excellent for data visualizations. Can render charts with clear labels and professional styling.",
|
||||
"best_practices": [
|
||||
"Professional chart designs with clear typography",
|
||||
"Data visualizations with embedded labels",
|
||||
"Clean infographic-style charts",
|
||||
"Good for statistical blog content"
|
||||
]
|
||||
},
|
||||
"infographic": {
|
||||
"guidance": "Best model choice for complex infographics. Multi-section layouts with clear visual hierarchy.",
|
||||
"best_practices": [
|
||||
"Use for comprehensive infographics with multiple data points",
|
||||
"Clear section boundaries and visual hierarchy",
|
||||
"Professional infographic aesthetic",
|
||||
"Excellent for educational or how-to content",
|
||||
"Multi-panel designs with distinct information areas"
|
||||
]
|
||||
},
|
||||
"diagram": {
|
||||
"guidance": "Excellent for technical diagrams and process illustrations. Clear visual representation of complex information.",
|
||||
"best_practices": [
|
||||
"Use for process flows, architectural diagrams, technical illustrations",
|
||||
"Clear visual hierarchy and labeling",
|
||||
"Professional diagram aesthetics",
|
||||
"Educational content visualization"
|
||||
]
|
||||
},
|
||||
"conceptual": {
|
||||
"guidance": "Professional conceptual imagery. Good for abstract representations with clear messaging.",
|
||||
"best_practices": [
|
||||
"Clear visual metaphors for abstract concepts",
|
||||
"Professional presentation style",
|
||||
"Good for educational or explanatory content"
|
||||
]
|
||||
}
|
||||
},
|
||||
# Default guidance for unknown models
|
||||
"_default": {
|
||||
"text_overlay": {
|
||||
"guidance": "Design for text overlay areas. Create clean backgrounds with high-contrast safe zones for text placement.",
|
||||
"best_practices": [
|
||||
"Use designated text areas (top 20% or bottom 20%)",
|
||||
"Create clean, uncluttered backgrounds",
|
||||
"Avoid embedding text directly in the image",
|
||||
"Design for text to be added as overlay"
|
||||
],
|
||||
"negative_prompt_additions": "text artifacts, unreadable text, embedded words"
|
||||
},
|
||||
"conceptual": {
|
||||
"guidance": "Focus on visual metaphors and abstract representations of the topic.",
|
||||
"best_practices": [
|
||||
"Use visual metaphors relevant to the content",
|
||||
"Create simple, clear compositions",
|
||||
"Avoid busy or cluttered designs"
|
||||
]
|
||||
},
|
||||
"chart": {
|
||||
"guidance": "Use abstract data representations. Avoid actual charts with embedded text.",
|
||||
"best_practices": [
|
||||
"Create visual metaphors for data",
|
||||
"Use shapes, colors, and patterns to represent information",
|
||||
"Design with text overlay zones for labels"
|
||||
],
|
||||
"warnings": ["Do not request actual charts with text - use abstract representations"]
|
||||
},
|
||||
"infographic": {
|
||||
"guidance": "Create multi-section infographic layouts with clear visual hierarchy. Use text overlay zones for information.",
|
||||
"best_practices": [
|
||||
"Multi-panel designs with distinct sections",
|
||||
"Clear visual hierarchy and organization",
|
||||
"Design with text overlay zones for each section",
|
||||
"Professional infographic aesthetic"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -471,8 +620,8 @@ def get_model_specific_guidance(model: Optional[str], image_type: Optional[str])
|
||||
model_lower = model.lower()
|
||||
image_type_lower = (image_type or "conceptual").lower()
|
||||
|
||||
# Get model guidance
|
||||
model_guidance = MODEL_SPECIFIC_GUIDANCE.get(model_lower, {})
|
||||
# Get model guidance (use _default for unknown models)
|
||||
model_guidance = MODEL_SPECIFIC_GUIDANCE.get(model_lower, MODEL_SPECIFIC_GUIDANCE.get("_default", {}))
|
||||
|
||||
# Get image type specific guidance
|
||||
type_guidance = model_guidance.get(image_type_lower, model_guidance.get("text_overlay", {}))
|
||||
@@ -480,63 +629,6 @@ def get_model_specific_guidance(model: Optional[str], image_type: Optional[str])
|
||||
return type_guidance
|
||||
|
||||
|
||||
def extract_visual_data(section: Dict[str, Any], research: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Intelligently extract visual-relevant data from section and research."""
|
||||
visual_data = {
|
||||
"visual_keywords": [],
|
||||
"data_points": [],
|
||||
"concepts": [],
|
||||
"statistics": []
|
||||
}
|
||||
|
||||
# Extract from section
|
||||
if section:
|
||||
# Key points that are visualizable
|
||||
key_points = section.get("key_points", []) or []
|
||||
for point in key_points[:5]:
|
||||
if isinstance(point, str):
|
||||
# Look for numbers, percentages, comparisons
|
||||
if any(char.isdigit() for char in point):
|
||||
visual_data["statistics"].append(point)
|
||||
# Look for visual concepts
|
||||
elif any(word in point.lower() for word in ["increase", "decrease", "growth", "trend", "pattern", "comparison"]):
|
||||
visual_data["data_points"].append(point)
|
||||
else:
|
||||
visual_data["concepts"].append(point)
|
||||
|
||||
# Subheadings that suggest visuals
|
||||
subheadings = section.get("subheadings", []) or []
|
||||
for subhead in subheadings[:3]:
|
||||
if isinstance(subhead, str):
|
||||
visual_data["concepts"].append(subhead)
|
||||
|
||||
# Keywords
|
||||
keywords = section.get("keywords", []) or []
|
||||
visual_data["visual_keywords"].extend([str(k) for k in keywords[:8] if k])
|
||||
|
||||
# Extract from research
|
||||
if research:
|
||||
# Key facts that are visualizable
|
||||
key_facts = research.get("key_facts", []) or research.get("highlights", []) or []
|
||||
for fact in key_facts[:3]:
|
||||
if isinstance(fact, str):
|
||||
if any(char.isdigit() for char in fact):
|
||||
visual_data["statistics"].append(fact)
|
||||
else:
|
||||
visual_data["data_points"].append(fact)
|
||||
|
||||
# Research insights
|
||||
insights = research.get("insights", []) or research.get("summary", "")
|
||||
if isinstance(insights, str) and insights:
|
||||
# Extract key phrases
|
||||
sentences = insights.split('.')[:3]
|
||||
visual_data["concepts"].extend([s.strip() for s in sentences if s.strip()])
|
||||
elif isinstance(insights, list):
|
||||
visual_data["concepts"].extend([str(i) for i in insights[:3]])
|
||||
|
||||
return visual_data
|
||||
|
||||
|
||||
@router.post("/suggest-prompts", response_model=ImagePromptSuggestResponse)
|
||||
def suggest_prompts(
|
||||
req: ImagePromptSuggestRequest,
|
||||
@@ -564,8 +656,18 @@ def suggest_prompts(
|
||||
industry = persona.get("industry", req.research.get("domain") if req.research else "your industry")
|
||||
tone = persona.get("tone", "professional, trustworthy")
|
||||
|
||||
# Extract visual-relevant data intelligently
|
||||
visual_data = extract_visual_data(section, req.research)
|
||||
# Extract visual-relevant data intelligently using the new module
|
||||
visual_data = _extract_visual_data(section, req.research)
|
||||
|
||||
# Get model recommendation based on content type
|
||||
model_recommendation = get_model_recommendation(visual_data)
|
||||
|
||||
# Build visual summary from extracted data
|
||||
visual_summary = build_visual_summary(visual_data)
|
||||
|
||||
# Add model recommendation to visual summary if available
|
||||
if model_recommendation:
|
||||
visual_summary += model_recommendation
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
@@ -620,19 +722,6 @@ def suggest_prompts(
|
||||
if model_warnings:
|
||||
provider_guidance += f"\n⚠️ WARNINGS:\n" + "\n".join([f"- {w}" for w in model_warnings])
|
||||
|
||||
# Build visual data summary from extracted data
|
||||
visual_summary_parts = []
|
||||
if visual_data["statistics"]:
|
||||
visual_summary_parts.append(f"Key Statistics: {', '.join(visual_data['statistics'][:3])}")
|
||||
if visual_data["data_points"]:
|
||||
visual_summary_parts.append(f"Data Points: {', '.join(visual_data['data_points'][:3])}")
|
||||
if visual_data["concepts"]:
|
||||
visual_summary_parts.append(f"Visual Concepts: {', '.join(visual_data['concepts'][:5])}")
|
||||
if visual_data["visual_keywords"]:
|
||||
visual_summary_parts.append(f"Keywords: {', '.join(visual_data['visual_keywords'][:8])}")
|
||||
|
||||
visual_summary = "\n".join(visual_summary_parts) if visual_summary_parts else ""
|
||||
|
||||
best_practices = (
|
||||
"BLOG IMAGE BEST PRACTICES: Create images optimized for blog content, not social media posters. "
|
||||
"Focus on: data visualization elements (charts, graphs, infographics), clean layouts with designated text overlay areas, "
|
||||
@@ -654,14 +743,15 @@ def suggest_prompts(
|
||||
else "Do not include on-image text, but still design with text overlay areas in mind for blog use."
|
||||
)
|
||||
|
||||
# Image type specific guidance
|
||||
# Image type specific guidance (enhanced with infographic type)
|
||||
image_type_guidance = {
|
||||
"realistic": "Photorealistic style with professional photography quality. Include camera settings and lighting details.",
|
||||
"chart": "⚠️ IMPORTANT: Complex infographics are too difficult for current AI models. Create simple visual representations with designated text overlay areas instead. Use abstract data visualization elements, not actual charts with embedded text.",
|
||||
"conceptual": "Abstract or conceptual imagery that represents the topic visually. Clean compositions with text overlay zones.",
|
||||
"diagram": "Technical diagrams with simple, clear visual elements. Design for text overlay areas, not embedded labels.",
|
||||
"illustration": "Stylized illustrations that support the content. Professional, clean aesthetic suitable for blog use.",
|
||||
"background": "Background images optimized for text overlays. Clean, uncluttered compositions with high-contrast text zones."
|
||||
"background": "Background images optimized for text overlays. Clean, uncluttered compositions with high-contrast text zones.",
|
||||
"infographic": "Multi-section infographic designs with clear visual hierarchy. Use designated areas for each data point or concept. Design with text overlay zones for information labels. Professional infographic aesthetics with clean, organized layouts."
|
||||
}.get(image_type, "General blog image guidance.")
|
||||
|
||||
# Build comprehensive prompt with visual data and model-specific guidance
|
||||
|
||||
22
backend/services/image_generation/__init__.py
Normal file
22
backend/services/image_generation/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Image Generation Services.
|
||||
|
||||
This package provides services for AI-powered image generation,
|
||||
including visual data extraction and prompt optimization.
|
||||
"""
|
||||
|
||||
from .visual_data_extractor import (
|
||||
extract_visual_data,
|
||||
get_model_recommendation,
|
||||
build_visual_summary,
|
||||
ExtractedVisualData,
|
||||
DOMAIN_VISUAL_CONCEPTS,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"extract_visual_data",
|
||||
"get_model_recommendation",
|
||||
"build_visual_summary",
|
||||
"ExtractedVisualData",
|
||||
"DOMAIN_VISUAL_CONCEPTS",
|
||||
]
|
||||
570
backend/services/image_generation/visual_data_extractor.py
Normal file
570
backend/services/image_generation/visual_data_extractor.py
Normal file
@@ -0,0 +1,570 @@
|
||||
"""
|
||||
Visual Data Extractor for Image Generation Prompts.
|
||||
|
||||
This module provides intelligent extraction of visual-relevant data from blog sections
|
||||
and research data to generate contextually relevant image prompts.
|
||||
|
||||
Key Features:
|
||||
- Statistics extraction with regex patterns
|
||||
- Domain-specific visual concept detection
|
||||
- Research source mining for visual data
|
||||
- Deduplication and data cleaning
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
# Pre-compiled regex patterns for performance
|
||||
_STATISTICAL_PATTERNS: List[Tuple[str, re.Pattern]] = [
|
||||
('percentage', re.compile(r'\d+[\d,]*%', re.IGNORECASE)),
|
||||
('currency', re.compile(r'\$[\d,]+(?:\.\d{2})?', re.IGNORECASE)),
|
||||
('multiplier', re.compile(r'\d+[\d,]*x', re.IGNORECASE)),
|
||||
('large_number', re.compile(r'\d+[\d,]*\s*(?:million|billion|thousand|trillion)s?', re.IGNORECASE)),
|
||||
('range', re.compile(r'\d+\s*-\s*\d+%', re.IGNORECASE)),
|
||||
('change_up', re.compile(r'up\s+by\s+\d+%', re.IGNORECASE)),
|
||||
('change_down', re.compile(r'down\s+by\s+\d+%', re.IGNORECASE)),
|
||||
('growth', re.compile(r'(?:increased|decreased|grew|declined)\s*[\d%]+', re.IGNORECASE)),
|
||||
('cagr', re.compile(r'cagr\s+of\s+[\d.]+%', re.IGNORECASE)),
|
||||
]
|
||||
|
||||
_VISUAL_DATA_PATTERNS: List[Tuple[str, re.Pattern]] = [
|
||||
('times', re.compile(r'\d+\s*(?:times|folds?)', re.IGNORECASE)),
|
||||
('ranking', re.compile(r'rank(?:ed|ing)?\s*(?:#?\d+|first|second|third|top|bottom)', re.IGNORECASE)),
|
||||
('comparison', re.compile(r'(?:vs|versus|compared\s+to|compared\s+with)', re.IGNORECASE)),
|
||||
('chart_mention', re.compile(r'(?:chart|graph|diagram|visual|infographic)', re.IGNORECASE)),
|
||||
('superlative', re.compile(r'(?:best|worst|leading|top|highest|lowest)', re.IGNORECASE)),
|
||||
]
|
||||
|
||||
_TREND_KEYWORDS: Set[str] = {
|
||||
'increase', 'decrease', 'growth', 'trend', 'pattern', 'comparison',
|
||||
'ranking', 'versus', 'vs', 'rise', 'fall', 'growth', 'decline',
|
||||
'surge', 'drop', 'climb', 'jump', 'plummet', 'soar', 'fluctuate'
|
||||
}
|
||||
|
||||
|
||||
# Domain-specific visual concepts mapping
|
||||
DOMAIN_VISUAL_CONCEPTS: Dict[str, List[str]] = {
|
||||
"tech": [
|
||||
"circuit board patterns", "digital interface", "data stream", "network nodes",
|
||||
"server racks", "silicon chips", "binary code", "cloud computing",
|
||||
"artificial intelligence", "machine learning model", "software code",
|
||||
"technology innovation", "digital transformation"
|
||||
],
|
||||
"healthcare": [
|
||||
"stethoscope", "medical chart", "hospital equipment", "DNA helix",
|
||||
"heart rate monitor", "medical cross", "prescription", "patient care",
|
||||
"healthcare professional", "medical research", "wellness", "health metrics"
|
||||
],
|
||||
"finance": [
|
||||
"stock chart", "dollar signs", "investment growth", "banking",
|
||||
"pie chart", "financial graph", "portfolio", "market trends",
|
||||
"cryptocurrency", "blockchain", "financial analysis", "wealth management"
|
||||
],
|
||||
"marketing": [
|
||||
"digital marketing", "social media", "content strategy", "audience growth",
|
||||
"brand awareness", "conversion funnel", "engagement metrics", "ROI chart",
|
||||
"marketing analytics", "customer acquisition", "viral content"
|
||||
],
|
||||
"education": [
|
||||
"classroom", "graduation cap", "books", "learning curve",
|
||||
"knowledge growth", "student achievement", "online learning", "curriculum",
|
||||
"educational technology", "academic success", "skill development"
|
||||
],
|
||||
"ecommerce": [
|
||||
"shopping cart", "product display", "checkout flow", "conversion",
|
||||
"customer journey", "inventory", "shipping", "discount tags",
|
||||
"online store", "e-commerce analytics", "retail technology"
|
||||
],
|
||||
"real_estate": [
|
||||
"building", "house", "property", "real estate market",
|
||||
"mortgage", "home ownership", "apartment complex", "construction",
|
||||
"property investment", "housing market", "architecture"
|
||||
],
|
||||
"food": [
|
||||
"restaurant", "cooking", "ingredients", "food preparation",
|
||||
"recipe", "menu", "dining experience", "culinary arts",
|
||||
"gourmet", "food photography", "healthy eating"
|
||||
],
|
||||
"travel": [
|
||||
"airplane", "destination", "map", "luggage", "passport",
|
||||
"tourist", "hotel", "beach resort", "adventure", "travel planning",
|
||||
"vacation", "world exploration"
|
||||
],
|
||||
"fitness": [
|
||||
"gym", "workout", "exercise", "muscle", "weight loss",
|
||||
"nutrition", "running", "yoga", "healthy lifestyle", "fitness tracking",
|
||||
"sports training", "wellness"
|
||||
],
|
||||
"fashion": [
|
||||
"clothing", "wardrobe", "style", "runway", "designer",
|
||||
"outfit", "accessories", "fashion trends", "personal style", "apparel"
|
||||
],
|
||||
"entertainment": [
|
||||
"movie reel", "music note", "concert", "celebrity", "streaming",
|
||||
"gaming", "content creation", "media production", "creative arts", "performance"
|
||||
],
|
||||
"business": [
|
||||
"office", "meeting", "presentation", "business growth", "strategy",
|
||||
"team collaboration", "enterprise", "corporate", "leadership", "productivity"
|
||||
],
|
||||
"science": [
|
||||
"laboratory", "microscope", "experiment", "data analysis", "research",
|
||||
"scientific method", "discovery", "innovation", "technology development"
|
||||
],
|
||||
"sports": [
|
||||
"stadium", "athlete", "scoreboard", "trophy", "team",
|
||||
"competition", "fitness", "championship", "sports analytics", "training"
|
||||
],
|
||||
"legal": [
|
||||
"gavel", "courthouse", "legal documents", "scales of justice",
|
||||
"law books", "legal contract", "attorney", "courtroom", "compliance"
|
||||
],
|
||||
"environmental": [
|
||||
"renewable energy", "solar panels", "wind turbines", "green technology",
|
||||
"sustainability", "climate change", "eco-friendly", "nature conservation"
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractedVisualData:
|
||||
"""Data class for extracted visual data."""
|
||||
visual_keywords: List[str] = field(default_factory=list)
|
||||
data_points: List[str] = field(default_factory=list)
|
||||
concepts: List[str] = field(default_factory=list)
|
||||
statistics: List[str] = field(default_factory=list)
|
||||
domain_concepts: List[str] = field(default_factory=list)
|
||||
visual_metaphors: List[str] = field(default_factory=list)
|
||||
detected_domains: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, List[str]]:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"visual_keywords": self.visual_keywords,
|
||||
"data_points": self.data_points,
|
||||
"concepts": self.concepts,
|
||||
"statistics": self.statistics,
|
||||
"domain_concepts": self.domain_concepts,
|
||||
"visual_metaphors": self.visual_metaphors,
|
||||
"detected_domains": self.detected_domains,
|
||||
}
|
||||
|
||||
def has_statistics(self) -> bool:
|
||||
"""Check if any statistics were extracted."""
|
||||
return bool(self.statistics)
|
||||
|
||||
def has_data_points(self) -> bool:
|
||||
"""Check if any data points were extracted."""
|
||||
return bool(self.data_points)
|
||||
|
||||
def has_domain_concepts(self) -> bool:
|
||||
"""Check if any domain concepts were extracted."""
|
||||
return bool(self.domain_concepts)
|
||||
|
||||
def is_data_heavy(self) -> bool:
|
||||
"""Check if content is data-heavy (statistics or data points)."""
|
||||
return self.has_statistics() or self.has_data_points()
|
||||
|
||||
def get_recommended_image_type(self) -> str:
|
||||
"""Get recommended image type based on extracted data."""
|
||||
if self.has_statistics() or self.has_data_points():
|
||||
return "infographic" if self.has_domain_concepts() else "chart"
|
||||
elif self.has_domain_concepts():
|
||||
return "conceptual"
|
||||
return "conceptual"
|
||||
|
||||
|
||||
def _extract_statistic_with_context(text: str) -> Optional[str]:
|
||||
"""
|
||||
Extract a statistic with surrounding context from text.
|
||||
|
||||
Args:
|
||||
text: Input text to search
|
||||
|
||||
Returns:
|
||||
Statistic with context (up to 60 chars before + statistic + 30 chars after),
|
||||
or None if no statistic found
|
||||
"""
|
||||
for pattern_name, pattern in _STATISTICAL_PATTERNS:
|
||||
match = pattern.search(text)
|
||||
if match:
|
||||
idx = match.start()
|
||||
context_start = max(0, idx - 60)
|
||||
context_end = min(len(text), match.end() + 30)
|
||||
context = text[context_start:context_end].strip()
|
||||
# Clean up to word boundaries
|
||||
if context_start > 0:
|
||||
# Find first space in context
|
||||
first_space = context.find(' ')
|
||||
if first_space > 0 and first_space < 20:
|
||||
context = context[first_space + 1:]
|
||||
return context
|
||||
return None
|
||||
|
||||
|
||||
def _has_visual_mention(text: str) -> bool:
|
||||
"""
|
||||
Check if text contains mentions of visual concepts.
|
||||
|
||||
Args:
|
||||
text: Input text to check
|
||||
|
||||
Returns:
|
||||
True if text contains visual data patterns
|
||||
"""
|
||||
for pattern_name, pattern in _VISUAL_DATA_PATTERNS:
|
||||
if pattern.search(text):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_trend_keyword(text: str) -> bool:
|
||||
"""
|
||||
Check if text contains trend/comparison keywords.
|
||||
|
||||
Args:
|
||||
text: Input text to check
|
||||
|
||||
Returns:
|
||||
True if text contains trend keywords
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
return any(keyword in text_lower for keyword in _TREND_KEYWORDS)
|
||||
|
||||
|
||||
def _detect_domains_in_text(text: str) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Detect industry/domain from text and return relevant visual concepts.
|
||||
|
||||
Args:
|
||||
text: Input text to analyze
|
||||
|
||||
Returns:
|
||||
Tuple of (detected_domain_names, domain_concepts)
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
detected_domains: List[str] = []
|
||||
all_concepts: List[str] = []
|
||||
|
||||
for domain, concepts in DOMAIN_VISUAL_CONCEPTS.items():
|
||||
# Check if domain name or any concept keyword is in text
|
||||
keywords_to_check = [domain] + concepts[:5]
|
||||
if any(keyword in text_lower for keyword in keywords_to_check):
|
||||
detected_domains.append(domain)
|
||||
# Add top 3 concepts for this domain
|
||||
all_concepts.extend(concepts[:3])
|
||||
|
||||
return detected_domains, list(set(all_concepts))
|
||||
|
||||
|
||||
def _deduplicate_and_limit(
|
||||
items: List[str],
|
||||
max_items: int = 10,
|
||||
key_length: int = 50
|
||||
) -> List[str]:
|
||||
"""
|
||||
Deduplicate items by normalized key and limit count.
|
||||
|
||||
Args:
|
||||
items: List of strings to deduplicate
|
||||
max_items: Maximum number of items to return
|
||||
key_length: Length of normalized key for comparison
|
||||
|
||||
Returns:
|
||||
Deduplicated list with max_items items
|
||||
"""
|
||||
seen: Set[str] = set()
|
||||
unique_items: List[str] = []
|
||||
|
||||
for item in items:
|
||||
if not item or not isinstance(item, str):
|
||||
continue
|
||||
normalized = item.lower().strip()[:key_length]
|
||||
if normalized and normalized not in seen and len(unique_items) < max_items:
|
||||
seen.add(normalized)
|
||||
unique_items.append(item.strip())
|
||||
|
||||
return unique_items
|
||||
|
||||
|
||||
def extract_visual_data(
|
||||
section: Optional[Dict[str, any]],
|
||||
research: Optional[Dict[str, any]]
|
||||
) -> ExtractedVisualData:
|
||||
"""
|
||||
Intelligently extract visual-relevant data from blog section and research.
|
||||
|
||||
This function analyzes section headings, key points, subheadings, keywords,
|
||||
and research data to extract statistics, data points, visual concepts,
|
||||
and domain-specific visual metaphors.
|
||||
|
||||
Args:
|
||||
section: Blog section dictionary with optional keys:
|
||||
- heading: Section title
|
||||
- subheadings: List of subheading strings
|
||||
- key_points: List of key point strings
|
||||
- keywords: List of keyword strings
|
||||
research: Research data dictionary with optional keys:
|
||||
- key_facts, highlights: List of fact strings
|
||||
- insights, summary: String or list of insight strings
|
||||
- sources, references: List of source dictionaries
|
||||
- keywords: Dict or list of keywords
|
||||
- domain, industry: Domain/industry string
|
||||
|
||||
Returns:
|
||||
ExtractedVisualData dataclass with extracted information
|
||||
|
||||
Example:
|
||||
>>> section = {
|
||||
... "heading": "AI in Healthcare",
|
||||
... "key_points": ["Market grew 40% in 2023", "Investment reached $5B"]
|
||||
... }
|
||||
>>> result = extract_visual_data(section, None)
|
||||
>>> result.statistics
|
||||
['Market grew 40% in 2023', 'Investment reached $5B']
|
||||
>>> result.domain_concepts
|
||||
['stethoscope', 'medical chart', 'hospital equipment']
|
||||
"""
|
||||
result = ExtractedVisualData()
|
||||
|
||||
# Phase 1: Extract from section
|
||||
if section:
|
||||
_extract_from_section(section, result)
|
||||
|
||||
# Phase 2: Extract from research
|
||||
if research:
|
||||
_extract_from_research(research, result)
|
||||
|
||||
# Phase 3: Deduplicate all extracted data
|
||||
_deduplicate_results(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _extract_from_section(section: Dict, result: ExtractedVisualData) -> None:
|
||||
"""Extract visual data from blog section."""
|
||||
|
||||
# Extract from key points
|
||||
key_points = section.get("key_points", []) or []
|
||||
for point in key_points[:10]: # Increased limit
|
||||
if not isinstance(point, str):
|
||||
continue
|
||||
|
||||
# Check for statistics
|
||||
stat = _extract_statistic_with_context(point)
|
||||
if stat:
|
||||
result.statistics.append(stat)
|
||||
# Also detect domains in statistical points
|
||||
domains, concepts = _detect_domains_in_text(point)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
continue
|
||||
|
||||
# Check for visual mentions or trend keywords
|
||||
if _has_visual_mention(point) or _has_trend_keyword(point):
|
||||
result.data_points.append(point)
|
||||
else:
|
||||
result.concepts.append(point)
|
||||
# Detect domains in regular concepts too
|
||||
domains, concepts = _detect_domains_in_text(point)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
# Extract from subheadings
|
||||
subheadings = section.get("subheadings", []) or []
|
||||
for subhead in subheadings[:7]:
|
||||
if isinstance(subhead, str):
|
||||
result.concepts.append(subhead)
|
||||
domains, concepts = _detect_domains_in_text(subhead)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
# Extract from keywords
|
||||
keywords = section.get("keywords", []) or []
|
||||
for kw in keywords[:12]:
|
||||
if kw and isinstance(kw, str):
|
||||
result.visual_keywords.append(str(kw))
|
||||
|
||||
# Detect domain from section heading
|
||||
heading = section.get("heading", "")
|
||||
if heading and isinstance(heading, str):
|
||||
domains, concepts = _detect_domains_in_text(heading)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
# Also add heading as a concept
|
||||
if heading.strip():
|
||||
result.concepts.insert(0, heading.strip())
|
||||
|
||||
|
||||
def _extract_from_research(research: Dict, result: ExtractedVisualData) -> None:
|
||||
"""Extract visual data from research data."""
|
||||
|
||||
# Extract from key facts/highlights
|
||||
key_facts = research.get("key_facts", []) or research.get("highlights", []) or []
|
||||
for fact in key_facts[:7]:
|
||||
if isinstance(fact, str):
|
||||
stat = _extract_statistic_with_context(fact)
|
||||
if stat:
|
||||
result.statistics.append(stat)
|
||||
else:
|
||||
result.data_points.append(fact)
|
||||
|
||||
# Extract from insights/summary
|
||||
insights = research.get("insights", []) or research.get("summary", "")
|
||||
if isinstance(insights, str) and insights:
|
||||
sentences = insights.split('.')[:7]
|
||||
for sent in sentences:
|
||||
sent = sent.strip()
|
||||
if sent:
|
||||
stat = _extract_statistic_with_context(sent)
|
||||
if stat:
|
||||
result.statistics.append(stat)
|
||||
else:
|
||||
result.concepts.append(sent)
|
||||
elif isinstance(insights, list):
|
||||
for insight in insights[:7]:
|
||||
if isinstance(insight, str):
|
||||
stat = _extract_statistic_with_context(insight)
|
||||
if stat:
|
||||
result.statistics.append(stat)
|
||||
else:
|
||||
result.concepts.append(insight)
|
||||
|
||||
# Extract from research sources
|
||||
sources = research.get("sources", []) or research.get("references", []) or []
|
||||
for source in sources[:7]:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
|
||||
# Extract from source title
|
||||
source_title = source.get("title", "")
|
||||
if source_title:
|
||||
domains, concepts = _detect_domains_in_text(source_title)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
# Extract from source excerpt/snippet
|
||||
source_excerpt = (
|
||||
source.get("excerpt", "")
|
||||
or source.get("snippet", "")
|
||||
or source.get("description", "")
|
||||
)
|
||||
if source_excerpt:
|
||||
# Extract statistic
|
||||
stat = _extract_statistic_with_context(source_excerpt)
|
||||
if stat:
|
||||
result.statistics.append(stat)
|
||||
|
||||
# Add as data point (limited to 200 chars)
|
||||
excerpt_text = source_excerpt[:200] if len(source_excerpt) > 200 else source_excerpt
|
||||
result.data_points.append(excerpt_text)
|
||||
|
||||
# Check for visual mentions
|
||||
if _has_visual_mention(source_excerpt):
|
||||
result.data_points.append(source_excerpt[:300])
|
||||
|
||||
# Detect domains
|
||||
domains, concepts = _detect_domains_in_text(source_excerpt)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
# Extract from research keywords
|
||||
research_keywords = research.get("keywords", {})
|
||||
if isinstance(research_keywords, dict):
|
||||
primary_kw = (
|
||||
research_keywords.get("primary_keywords", [])
|
||||
or research_keywords.get("primary", [])
|
||||
or []
|
||||
)
|
||||
for kw in primary_kw[:7]:
|
||||
if isinstance(kw, str):
|
||||
domains, concepts = _detect_domains_in_text(kw)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
elif isinstance(research_keywords, list):
|
||||
for kw in research_keywords[:7]:
|
||||
if isinstance(kw, str):
|
||||
domains, concepts = _detect_domains_in_text(kw)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
# Extract from research domain/industry
|
||||
research_domain = research.get("domain", "") or research.get("industry", "")
|
||||
if research_domain:
|
||||
domains, concepts = _detect_domains_in_text(research_domain)
|
||||
result.detected_domains.extend(domains)
|
||||
result.domain_concepts.extend(concepts)
|
||||
|
||||
|
||||
def _deduplicate_results(result: ExtractedVisualData) -> None:
|
||||
"""Deduplicate all extracted data."""
|
||||
result.visual_keywords = _deduplicate_and_limit(result.visual_keywords, 12)
|
||||
result.data_points = _deduplicate_and_limit(result.data_points, 10)
|
||||
result.concepts = _deduplicate_and_limit(result.concepts, 10)
|
||||
result.statistics = _deduplicate_and_limit(result.statistics, 10)
|
||||
result.domain_concepts = _deduplicate_and_limit(result.domain_concepts, 10)
|
||||
result.detected_domains = list(set(result.detected_domains))
|
||||
|
||||
|
||||
def get_model_recommendation(visual_data: ExtractedVisualData) -> Optional[str]:
|
||||
"""
|
||||
Get model recommendation based on extracted visual data.
|
||||
|
||||
Args:
|
||||
visual_data: ExtractedVisualData instance
|
||||
|
||||
Returns:
|
||||
Model recommendation string or None
|
||||
"""
|
||||
if visual_data.is_data_heavy():
|
||||
return (
|
||||
"\n\nMODEL RECOMMENDATION: This section contains data/statistics. "
|
||||
"Consider using:\n"
|
||||
"- FLUX Kontext Pro: Best for data visualizations with text labels\n"
|
||||
"- GLM-Image: Excellent for infographics and educational diagrams\n"
|
||||
"- Ideogram V3 Turbo: Good for simple charts with text overlays"
|
||||
)
|
||||
elif visual_data.has_domain_concepts():
|
||||
return (
|
||||
"\n\nMODEL RECOMMENDATION: This section covers domain-specific content. "
|
||||
"Consider using:\n"
|
||||
"- Qwen Image: Best for abstract conceptual imagery\n"
|
||||
"- FLUX Kontext Pro: Good for conceptual imagery with text support\n"
|
||||
"- FLUX 2 Flex: Excellent for poster-style conceptual designs"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def build_visual_summary(visual_data: ExtractedVisualData) -> str:
|
||||
"""
|
||||
Build a text summary from extracted visual data.
|
||||
|
||||
Args:
|
||||
visual_data: ExtractedVisualData instance
|
||||
|
||||
Returns:
|
||||
Formatted summary string for use in prompts
|
||||
"""
|
||||
parts: List[str] = []
|
||||
|
||||
if visual_data.statistics:
|
||||
parts.append(f"Key Statistics: {', '.join(visual_data.statistics[:3])}")
|
||||
|
||||
if visual_data.data_points:
|
||||
parts.append(f"Data Points: {', '.join(visual_data.data_points[:3])}")
|
||||
|
||||
if visual_data.concepts:
|
||||
parts.append(f"Visual Concepts: {', '.join(visual_data.concepts[:5])}")
|
||||
|
||||
if visual_data.visual_keywords:
|
||||
parts.append(f"Keywords: {', '.join(visual_data.visual_keywords[:8])}")
|
||||
|
||||
if visual_data.domain_concepts:
|
||||
parts.append(f"Domain Visual Concepts: {', '.join(visual_data.domain_concepts[:5])}")
|
||||
|
||||
if visual_data.detected_domains:
|
||||
parts.append(f"Detected Domains: {', '.join(visual_data.detected_domains)}")
|
||||
|
||||
return "\n".join(parts) if parts else ""
|
||||
221
docs/IMAGE_GENERATION_IMPROVEMENTS.md
Normal file
221
docs/IMAGE_GENERATION_IMPROVEMENTS.md
Normal file
@@ -0,0 +1,221 @@
|
||||
# Image Generation for Blog Writer - Technical Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
This document describes the improvements made to image generation for the ALwrity Blog Writer feature, making generated images more relevant to blog content through intelligent visual data extraction and model selection.
|
||||
|
||||
## Architecture
|
||||
|
||||
### New Module Structure
|
||||
|
||||
```
|
||||
backend/services/image_generation/
|
||||
├── __init__.py # Package exports
|
||||
└── visual_data_extractor.py # Core extraction logic
|
||||
|
||||
backend/api/images.py # Updated to use new module
|
||||
```
|
||||
|
||||
### Key Components
|
||||
|
||||
1. **Visual Data Extractor** (`visual_data_extractor.py`)
|
||||
- Extracts statistics, data points, visual concepts, and domain-specific imagery
|
||||
- Pre-compiled regex patterns for performance
|
||||
- Domain detection across 16 industry verticals
|
||||
- Dataclass-based return type for type safety
|
||||
|
||||
2. **Model-Specific Guidance** (`images.py`)
|
||||
- Extended guidance for 5 models (Ideogram V3, FLUX Kontext Pro, Qwen Image, FLUX 2 Flex, GLM-Image)
|
||||
- Image type recommendations (infographic, chart, conceptual, etc.)
|
||||
- Content-based model selection
|
||||
|
||||
## Features
|
||||
|
||||
### 1. Statistics Extraction
|
||||
|
||||
**Patterns Supported:**
|
||||
- Percentages: `42%`, `1,000,000%`
|
||||
- Currency: `$500`, `$1.5M`
|
||||
- Multipliers: `5x`, `10x growth`
|
||||
- Large numbers: `million`, `billion`, `thousand`
|
||||
- Ranges: `20-30%`
|
||||
- Change indicators: `up by 30%`, `down by 15%`
|
||||
- CAGR: `CAGR of 44.9%`
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
section = {"key_points": ["Market grew 40% in 2023", "Investment reached $5 billion"]}
|
||||
result = extract_visual_data(section, None)
|
||||
# result.statistics = ["Market grew 40% in 2023", "Investment reached $5 billion"]
|
||||
```
|
||||
|
||||
### 2. Domain Detection
|
||||
|
||||
**Supported Domains (16):**
|
||||
- Tech (AI, cloud, software, digital transformation)
|
||||
- Healthcare (medical, hospital, patient care)
|
||||
- Finance (investment, banking, stock market)
|
||||
- Marketing (digital marketing, social media, ROI)
|
||||
- Education (learning, academic, curriculum)
|
||||
- E-commerce (shopping, conversion, inventory)
|
||||
- Real Estate (property, mortgage, housing)
|
||||
- Food (restaurant, cooking, recipe)
|
||||
- Travel (destination, adventure, vacation)
|
||||
- Fitness (workout, nutrition, wellness)
|
||||
- Fashion (clothing, style, designer)
|
||||
- Entertainment (streaming, gaming, content)
|
||||
- Business (enterprise, strategy, leadership)
|
||||
- Science (research, experiment, laboratory)
|
||||
- Sports (competition, training, championship)
|
||||
- Legal (compliance, contracts, courtroom)
|
||||
- Environmental (sustainability, renewable, eco-friendly)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
section = {"heading": "AI in Healthcare Market"}
|
||||
result = extract_visual_data(section, None)
|
||||
# result.detected_domains = ["healthcare", "tech"]
|
||||
# result.domain_concepts = ["stethoscope", "medical chart", "hospital equipment"]
|
||||
```
|
||||
|
||||
### 3. Visual Data Patterns
|
||||
|
||||
**Detected Patterns:**
|
||||
- Rankings: `ranked #1`, `top performer`, `leading brand`
|
||||
- Comparisons: `vs`, `versus`, `compared to`
|
||||
- Trends: `increase`, `decrease`, `growth`, `surge`
|
||||
- Multipliers: `5 times`, `3-fold`
|
||||
|
||||
### 4. Model Selection Recommendations
|
||||
|
||||
Based on extracted content type:
|
||||
|
||||
**For Data-Heavy Content (statistics/data points):**
|
||||
- FLUX Kontext Pro: Best for data visualizations with text labels
|
||||
- GLM-Image: Excellent for infographics and educational diagrams
|
||||
- Ideogram V3 Turbo: Good for simple charts with text overlays
|
||||
|
||||
**For Domain-Specific Content:**
|
||||
- Qwen Image: Best for abstract conceptual imagery
|
||||
- FLUX Kontext Pro: Good for conceptual imagery with text support
|
||||
- FLUX 2 Flex: Excellent for poster-style conceptual designs
|
||||
|
||||
## API Integration
|
||||
|
||||
### Endpoint: `POST /api/images/suggest-prompts`
|
||||
|
||||
**Request Body:**
|
||||
```json
|
||||
{
|
||||
"provider": "wavespeed",
|
||||
"model": "flux-kontext-pro",
|
||||
"image_type": "infographic",
|
||||
"title": "AI in Healthcare Market",
|
||||
"section": {
|
||||
"heading": "Market Growth",
|
||||
"subheadings": ["Statistics", "Key Players"],
|
||||
"key_points": ["Market grew 40% in 2023", "Investment reached $5B"]
|
||||
},
|
||||
"research": {
|
||||
"domain": "healthcare",
|
||||
"key_facts": ["CAGR of 44.9% projected"]
|
||||
},
|
||||
"persona": {
|
||||
"audience": "healthcare professionals",
|
||||
"tone": "professional"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"suggestions": [
|
||||
{
|
||||
"prompt": "Professional infographic showing AI healthcare market growth...",
|
||||
"negative_prompt": "blurry, distorted, text artifacts...",
|
||||
"width": 1024,
|
||||
"height": 1024,
|
||||
"overlay_text": "40% Growth"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
from services.image_generation import extract_visual_data, build_visual_summary, get_model_recommendation
|
||||
|
||||
# Extract visual data from blog section and research
|
||||
section = {
|
||||
"heading": "Digital Marketing Trends 2024",
|
||||
"key_points": [
|
||||
"Social media engagement up 60% YoY",
|
||||
"Video content drives 3x more engagement",
|
||||
"ROI increased by 45% with personalized campaigns"
|
||||
],
|
||||
"keywords": ["marketing", "social media", "ROI"]
|
||||
}
|
||||
|
||||
research = {
|
||||
"domain": "marketing",
|
||||
"sources": [
|
||||
{
|
||||
"title": "Marketing Trends Report 2024",
|
||||
"excerpt": "Digital ad spend reached $50 billion, up 25% from last year."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Extract visual data
|
||||
result = extract_visual_data(section, research)
|
||||
|
||||
# Access extracted data
|
||||
print(f"Statistics: {result.statistics}")
|
||||
print(f"Domain: {result.detected_domains}")
|
||||
print(f"Concepts: {result.domain_concepts}")
|
||||
|
||||
# Get model recommendation
|
||||
rec = get_model_recommendation(result)
|
||||
print(f"Recommendation: {rec}")
|
||||
|
||||
# Build summary for prompt
|
||||
summary = build_visual_summary(result)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
**Unit Tests:** `backend/tests/services/test_visual_data_extractor.py`
|
||||
|
||||
Run tests:
|
||||
```bash
|
||||
cd backend
|
||||
pytest tests/services/test_visual_data_extractor.py -v
|
||||
```
|
||||
|
||||
**Test Coverage:**
|
||||
- Statistics extraction (8 tests)
|
||||
- Visual mention detection (5 tests)
|
||||
- Trend keyword detection (4 tests)
|
||||
- Domain detection (6 tests)
|
||||
- Deduplication (5 tests)
|
||||
- Main extraction function (8 tests)
|
||||
- Model recommendations (3 tests)
|
||||
- Visual summary building (3 tests)
|
||||
- Integration tests (3 tests)
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
1. **Pre-compiled Regex Patterns**: All regex patterns are compiled once at module load time, not on each function call.
|
||||
|
||||
2. **Deduplication**: Results are deduplicated using normalized keys to prevent duplicate entries.
|
||||
|
||||
3. **Lazy Evaluation**: Only processes required fields from input data.
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Additional Domains**: Support for more industry verticals
|
||||
2. **Custom Visual Metaphors**: Allow users to define domain-specific visual concepts
|
||||
3. **A/B Testing**: Compare image relevance across different prompt strategies
|
||||
4. **Feedback Loop**: Use image selection data to improve future prompt generation
|
||||
@@ -9,7 +9,7 @@ import InfoIcon from '@mui/icons-material/Info';
|
||||
import { useImageGeneration, ImageGenerationRequest, fetchPromptSuggestions } from './useImageGeneration';
|
||||
|
||||
type Provider = 'huggingface' | 'stability' | 'wavespeed';
|
||||
type ImageType = 'realistic' | 'chart' | 'conceptual' | 'diagram' | 'illustration' | 'background';
|
||||
type ImageType = 'realistic' | 'chart' | 'conceptual' | 'diagram' | 'illustration' | 'background' | 'infographic';
|
||||
|
||||
interface ImageGeneratorProps {
|
||||
defaultProvider?: Provider;
|
||||
|
||||
Reference in New Issue
Block a user