AI Analysis and Content Strategy fixes. Enhanced Strategy Routes refactoring.

This commit is contained in:
ajaysi
2026-01-10 19:32:50 +05:30
parent 0b63ae7fc1
commit 8193cdba67
298 changed files with 45678 additions and 10952 deletions

View File

@@ -31,7 +31,7 @@ logger = get_service_logger("api.images")
class ImageGenerateRequest(BaseModel):
prompt: str
negative_prompt: Optional[str] = None
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability)$")
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability|wavespeed)$")
model: Optional[str] = None
width: Optional[int] = Field(default=1024, ge=64, le=2048)
height: Optional[int] = Field(default=1024, ge=64, le=2048)
@@ -246,7 +246,10 @@ def generate(
# Non-blocking: log error but don't fail the request
logger.error(f"[images.generate] ❌ Failed to track usage: {usage_error}", exc_info=True)
return ImageGenerateResponse(
# Create response with explicit success field
# Note: Asset saving and usage tracking are non-blocking and won't affect this response
response = ImageGenerateResponse(
success=True,
image_base64=image_b64,
image_url=image_url,
width=result.width,
@@ -255,6 +258,11 @@ def generate(
model=result.model,
seed=result.seed,
)
logger.info(f"[images.generate] ✅ Returning successful response: provider={result.provider}, model={result.model}, size={len(image_b64)} chars")
# Return response immediately - any post-processing errors won't affect the response
return response
except Exception as inner:
last_error = inner
logger.error(f"Image generation attempt {attempt+1} failed: {inner}")
@@ -282,7 +290,9 @@ class PromptSuggestion(BaseModel):
class ImagePromptSuggestRequest(BaseModel):
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability)$")
provider: Optional[str] = Field(None, pattern="^(gemini|huggingface|stability|wavespeed)$")
model: Optional[str] = None # Specific model (e.g., "qwen-image", "ideogram-v3-turbo")
image_type: Optional[str] = Field(None, pattern="^(realistic|chart|conceptual|diagram|illustration|background)$")
title: Optional[str] = None
section: Optional[Dict[str, Any]] = None
research: Optional[Dict[str, Any]] = None
@@ -315,6 +325,218 @@ class ImageEditResponse(BaseModel):
seed: Optional[int] = None
# Model-specific guidance for prompt optimization
MODEL_SPECIFIC_GUIDANCE = {
"ideogram-v3-turbo": {
"text_overlay": {
"guidance": "Ideogram V3 excels at rendering readable text. Use simple, bold text (max 3-5 words). Avoid complex infographics - instead create clean backgrounds with designated text areas.",
"best_practices": [
"Use high contrast areas (top 20% or bottom 20%) for text placement",
"Keep text simple: headlines, statistics, or short phrases only",
"Avoid rendering text as part of complex graphics",
"Design with 'text overlay zones' in mind, not embedded text"
],
"negative_prompt_additions": "complex infographics, detailed charts with text, busy data visualizations"
},
"realistic": {
"guidance": "Photorealistic generation with professional quality. Include camera settings and lighting cues.",
"best_practices": [
"Include camera settings: '50mm lens, f/2.8, professional photography'",
"Specify lighting: 'natural lighting, soft shadows, rim light'",
"Add quality descriptors: 'high quality, detailed, sharp focus'"
]
},
"chart": {
"guidance": "Simple bar charts or pie charts with minimal text. Use high contrast areas for labels.",
"best_practices": [
"Avoid complex infographics - use simple visual representations",
"Design with text overlay zones, not embedded text",
"Use abstract data visualization elements"
],
"warnings": ["Complex infographics are too difficult - use simple charts or conceptual representations"]
},
"conceptual": {
"guidance": "Conceptual imagery with photorealistic elements. Clean compositions with text overlay areas.",
"best_practices": [
"Focus on visual metaphors and abstract concepts",
"Design with text overlay zones in mind (top/bottom 30%)",
"Use simple, clear compositions"
]
}
},
"flux-kontext-pro": {
"text_overlay": {
"guidance": "FLUX Kontext Pro excels at typography and text rendering with improved prompt adherence. Best for professional designs with text elements.",
"best_practices": [
"Excellent for images requiring clear, readable text",
"Superior typography rendering compared to other models",
"Improved prompt adherence for consistent results",
"Can handle text in various styles and sizes",
"Best for professional blog images with embedded text or typography"
],
"negative_prompt_additions": ""
},
"realistic": {
"guidance": "Photorealistic generation with professional typography support. Include text elements naturally in the composition.",
"best_practices": [
"Can render text elements within realistic scenes",
"Include typography naturally in the design",
"Specify text style, size, and placement in prompts",
"Use for professional designs requiring text integration"
]
},
"chart": {
"guidance": "Excellent for data visualizations with text labels. Can render simple charts with clear typography.",
"best_practices": [
"Can render charts with text labels effectively",
"Use for data visualizations requiring clear typography",
"Specify chart type and label requirements clearly",
"Design with text integration in mind"
],
"warnings": ["Complex infographics may still be challenging - start with simple charts"]
},
"diagram": {
"guidance": "Technical diagrams with clear text labels. Excellent typography for professional diagrams.",
"best_practices": [
"Can render diagrams with embedded text labels",
"Specify text requirements clearly in prompts",
"Use for technical illustrations requiring typography",
"Design with text integration as a core element"
]
},
"illustration": {
"guidance": "Stylized illustrations with typography support. Professional designs with text elements.",
"best_practices": [
"Can integrate text naturally into illustrations",
"Specify typography style and placement",
"Use for professional blog illustrations with text",
"Design with text as a design element"
]
},
"conceptual": {
"guidance": "Conceptual imagery with typography capabilities. Can include text elements naturally.",
"best_practices": [
"Can integrate text into conceptual designs",
"Use for abstract concepts with text support",
"Specify text requirements in prompts",
"Design with typography as a visual element"
]
}
},
"qwen-image": {
"text_overlay": {
"guidance": "Qwen Image does NOT render readable text well. Design for text overlay areas only - never ask for text in the image itself.",
"best_practices": [
"Create clean backgrounds with high-contrast safe zones",
"Design simple compositions with space for text (top/bottom 30%)",
"Use abstract or conceptual imagery that supports text",
"NEVER request text, words, or labels in the image"
],
"negative_prompt_additions": "text, words, letters, numbers, labels, captions, infographics with text"
},
"conceptual": {
"guidance": "Best for abstract concepts, simple diagrams, and background imagery.",
"best_practices": [
"Focus on visual metaphors and abstract representations",
"Use simple compositions with clear focal points",
"Avoid complex details or fine textures"
]
},
"chart": {
"guidance": "Abstract representation of data - avoid actual charts. Use shapes, colors, and patterns to represent data concepts.",
"best_practices": [
"Create visual metaphors for data, not actual charts",
"Use abstract patterns and shapes",
"Design with text overlay zones for data labels"
],
"warnings": ["Do not request actual charts with text - use abstract representations instead"]
},
"background": {
"guidance": "Perfect for background images with text overlay areas. Clean, simple compositions.",
"best_practices": [
"Focus on clean backgrounds with designated text zones",
"Use simple, uncluttered compositions",
"High contrast areas for text placement"
]
}
}
}
def get_model_specific_guidance(model: Optional[str], image_type: Optional[str]) -> Dict[str, Any]:
"""Get model-specific guidance based on model and image type."""
if not model:
return {}
model_lower = model.lower()
image_type_lower = (image_type or "conceptual").lower()
# Get model guidance
model_guidance = MODEL_SPECIFIC_GUIDANCE.get(model_lower, {})
# Get image type specific guidance
type_guidance = model_guidance.get(image_type_lower, model_guidance.get("text_overlay", {}))
return type_guidance
def extract_visual_data(section: Dict[str, Any], research: Optional[Dict[str, Any]]) -> Dict[str, Any]:
"""Intelligently extract visual-relevant data from section and research."""
visual_data = {
"visual_keywords": [],
"data_points": [],
"concepts": [],
"statistics": []
}
# Extract from section
if section:
# Key points that are visualizable
key_points = section.get("key_points", []) or []
for point in key_points[:5]:
if isinstance(point, str):
# Look for numbers, percentages, comparisons
if any(char.isdigit() for char in point):
visual_data["statistics"].append(point)
# Look for visual concepts
elif any(word in point.lower() for word in ["increase", "decrease", "growth", "trend", "pattern", "comparison"]):
visual_data["data_points"].append(point)
else:
visual_data["concepts"].append(point)
# Subheadings that suggest visuals
subheadings = section.get("subheadings", []) or []
for subhead in subheadings[:3]:
if isinstance(subhead, str):
visual_data["concepts"].append(subhead)
# Keywords
keywords = section.get("keywords", []) or []
visual_data["visual_keywords"].extend([str(k) for k in keywords[:8] if k])
# Extract from research
if research:
# Key facts that are visualizable
key_facts = research.get("key_facts", []) or research.get("highlights", []) or []
for fact in key_facts[:3]:
if isinstance(fact, str):
if any(char.isdigit() for char in fact):
visual_data["statistics"].append(fact)
else:
visual_data["data_points"].append(fact)
# Research insights
insights = research.get("insights", []) or research.get("summary", "")
if isinstance(insights, str) and insights:
# Extract key phrases
sentences = insights.split('.')[:3]
visual_data["concepts"].extend([s.strip() for s in sentences if s.strip()])
elif isinstance(insights, list):
visual_data["concepts"].extend([str(i) for i in insights[:3]])
return visual_data
@router.post("/suggest-prompts", response_model=ImagePromptSuggestResponse)
def suggest_prompts(
req: ImagePromptSuggestRequest,
@@ -322,6 +544,9 @@ def suggest_prompts(
) -> ImagePromptSuggestResponse:
try:
provider = (req.provider or ("gemini" if (os.getenv("GPT_PROVIDER") or "").lower().startswith("gemini") else "huggingface")).lower()
model = req.model or None
image_type = req.image_type or "conceptual"
section = req.section or {}
title = (req.title or section.get("heading") or "").strip()
subheads = section.get("subheadings", []) or []
@@ -338,6 +563,9 @@ def suggest_prompts(
audience = persona.get("audience", "content creators and digital marketers")
industry = persona.get("industry", req.research.get("domain") if req.research else "your industry")
tone = persona.get("tone", "professional, trustworthy")
# Extract visual-relevant data intelligently
visual_data = extract_visual_data(section, req.research)
schema = {
"type": "object",
@@ -368,52 +596,129 @@ def suggest_prompts(
"Return STRICT JSON matching the provided schema, no extra text."
)
provider_guidance = {
# Get model-specific guidance
model_guidance_data = get_model_specific_guidance(model, image_type)
model_guidance_text = model_guidance_data.get("guidance", "")
model_best_practices = model_guidance_data.get("best_practices", [])
model_warnings = model_guidance_data.get("warnings", [])
negative_prompt_additions = model_guidance_data.get("negative_prompt_additions", "")
# Build provider guidance with model-specific details
provider_guidance_base = {
"huggingface": "Photorealistic Flux 1 Krea Dev; include camera/lighting cues (e.g., 50mm, f/2.8, rim light).",
"gemini": "Editorial, brand-safe, crisp edges, balanced lighting; avoid artifacts.",
"stability": "SDXL coherent details, sharp focus, cinematic contrast; readable text if present."
"stability": "SDXL coherent details, sharp focus, cinematic contrast; readable text if present.",
"wavespeed": "Blog-optimized imagery: focus on data visualization, infographics, clean layouts with text overlay areas, professional diagrams, charts, or conceptual illustrations. Avoid random people or poster-style images. Prefer clean backgrounds suitable for text overlays, data representations, or abstract concepts that support the blog content."
}.get(provider, "")
# Combine provider and model-specific guidance
provider_guidance = provider_guidance_base
if model_guidance_text:
provider_guidance = f"{provider_guidance_base}\n\nMODEL-SPECIFIC GUIDANCE ({model}): {model_guidance_text}"
if model_best_practices:
provider_guidance += f"\nBest Practices:\n" + "\n".join([f"- {bp}" for bp in model_best_practices])
if model_warnings:
provider_guidance += f"\n⚠️ WARNINGS:\n" + "\n".join([f"- {w}" for w in model_warnings])
# Build visual data summary from extracted data
visual_summary_parts = []
if visual_data["statistics"]:
visual_summary_parts.append(f"Key Statistics: {', '.join(visual_data['statistics'][:3])}")
if visual_data["data_points"]:
visual_summary_parts.append(f"Data Points: {', '.join(visual_data['data_points'][:3])}")
if visual_data["concepts"]:
visual_summary_parts.append(f"Visual Concepts: {', '.join(visual_data['concepts'][:5])}")
if visual_data["visual_keywords"]:
visual_summary_parts.append(f"Keywords: {', '.join(visual_data['visual_keywords'][:8])}")
visual_summary = "\n".join(visual_summary_parts) if visual_summary_parts else ""
best_practices = (
"Best Practices: one clear focal subject; clean, uncluttered background; rule-of-thirds or center-weighted composition; "
"text-safe margins if overlay text is included; neutral lighting if unsure; realistic skin tones; avoid busy patterns; "
"no brand logos or watermarks; no copyrighted characters; avoid low-res, blur, noise, banding, oversaturation, over-sharpening; "
"ensure hands and text are coherent if present; prefer 1024px+ on shortest side for quality."
"BLOG IMAGE BEST PRACTICES: Create images optimized for blog content, not social media posters. "
"Focus on: data visualization elements (charts, graphs, infographics), clean layouts with designated text overlay areas, "
"professional diagrams, conceptual illustrations, or abstract representations of the topic. "
"Avoid: random people posing, poster-style compositions, busy social media graphics, or trying to recreate text/words as images. "
"Instead: use clean backgrounds, simple compositions, areas reserved for text overlays, data-driven visuals, or conceptual imagery. "
"Technical: one clear focal subject; clean, uncluttered background; text-safe margins (20% padding on all sides for overlays); "
"neutral or professional lighting; avoid busy patterns; no brand logos or watermarks; no copyrighted characters; "
"avoid low-res, blur, noise, banding, oversaturation, over-sharpening; prefer 1024px+ on shortest side for quality."
)
# Harvest a few concise facts from research if available
facts: list[str] = []
try:
if req.research:
# try common shapes used in research service
top_stats = req.research.get("key_facts") or req.research.get("highlights") or []
if isinstance(top_stats, list):
facts = [str(x) for x in top_stats[:3]]
elif isinstance(top_stats, dict):
facts = [f"{k}: {v}" for k, v in list(top_stats.items())[:3]]
except Exception:
facts = []
facts_line = ", ".join(facts) if facts else ""
overlay_hint = "Include an on-image short title or fact if it improves communication; ensure clean, high-contrast safe area for text." if (req.include_overlay is None or req.include_overlay) else "Do not include on-image text."
overlay_hint = (
"IMPORTANT FOR BLOG IMAGES: Design images with text overlay areas in mind. "
"Include space for headlines, captions, or data labels. "
"Suggest overlay_text (short title or key statistic, <= 8 words) that would work well as a text overlay. "
"Ensure clean, high-contrast safe areas (top 20% or bottom 20% of image) for text placement. "
"The image should complement text, not replace it - think data visualization, infographics, or clean conceptual imagery."
if (req.include_overlay is None or req.include_overlay)
else "Do not include on-image text, but still design with text overlay areas in mind for blog use."
)
# Image type specific guidance
image_type_guidance = {
"realistic": "Photorealistic style with professional photography quality. Include camera settings and lighting details.",
"chart": "⚠️ IMPORTANT: Complex infographics are too difficult for current AI models. Create simple visual representations with designated text overlay areas instead. Use abstract data visualization elements, not actual charts with embedded text.",
"conceptual": "Abstract or conceptual imagery that represents the topic visually. Clean compositions with text overlay zones.",
"diagram": "Technical diagrams with simple, clear visual elements. Design for text overlay areas, not embedded labels.",
"illustration": "Stylized illustrations that support the content. Professional, clean aesthetic suitable for blog use.",
"background": "Background images optimized for text overlays. Clean, uncluttered compositions with high-contrast text zones."
}.get(image_type, "General blog image guidance.")
# Build comprehensive prompt with visual data and model-specific guidance
prompt = f"""
Provider: {provider}
Model: {model or 'auto-selected'}
Image Type: {image_type}
Title: {title}
Subheadings: {', '.join(subheads[:5])}
Key Points: {', '.join(key_points[:5])}
Keywords: {', '.join([str(k) for k in keywords[:8]])}
Research Facts: {facts_line}
VISUAL DATA EXTRACTED FROM CONTENT:
{visual_summary if visual_summary else f"Subheadings: {', '.join(subheads[:5])}\nKey Points: {', '.join(key_points[:5])}\nKeywords: {', '.join([str(k) for k in keywords[:8]])}"}
CONTEXT:
Audience: {audience}
Industry: {industry}
Tone: {tone}
Craft prompts that visually reflect this exact section (not generic blog topic). {provider_guidance}
BLOG IMAGE GENERATION TASK: Create image prompts optimized for blog content, NOT social media posters.
PROVIDER & MODEL GUIDANCE:
{provider_guidance}
IMAGE TYPE GUIDANCE:
{image_type_guidance}
BEST PRACTICES:
{best_practices}
TEXT OVERLAY GUIDANCE:
{overlay_hint}
Include a suitable negative_prompt where helpful. Suggest width/height when relevant (e.g., 1024x1024 or 1920x1080).
If including on-image text, return it in overlay_text (short: <= 8 words).
PROMPT GENERATION INSTRUCTIONS:
Generate 3-5 diverse, well-formed prompt variations that:
1. Intelligently use the visual data provided above (statistics, data points, concepts, keywords)
2. Focus on the most visually-relevant elements from the section subheadings, key points, and research
3. Create prompts that are optimized for the selected image type ({image_type})
4. Follow model-specific best practices and avoid model limitations
5. Include clean backgrounds suitable for text overlays
6. Avoid random people, poster compositions, or trying to render text as images
7. Support the blog section's content with relevant visual metaphors or data representations
8. Are optimized for blog article use (not social media)
PROMPT QUALITY REQUIREMENTS:
- Each prompt should be specific and detailed (50-100 words)
- Use the visual data intelligently - prioritize statistics and data points for charts, concepts for conceptual images
- Include visual composition guidance (layout, colors, style)
- Specify lighting and quality descriptors when appropriate
- Make prompts actionable and clear for the AI model
NEGATIVE PROMPT:
Include a suitable negative_prompt that excludes: people posing, social media graphics, posters, text rendered as images, busy compositions, watermarks, logos{f", {negative_prompt_additions}" if negative_prompt_additions else ""}.
DIMENSIONS:
Suggest width/height when relevant (e.g., 1024x1024 for square, 1920x1080 for landscape blog headers).
OVERLAY TEXT:
If including overlay text suggestion, return it in overlay_text (short: <= 8 words, typically a key statistic or section title). Use statistics from the visual data when available.
"""
# Get user_id for llm_text_gen subscription check (required)