story writer backend migration complete, Blog writer SEO and story writer backend migration complete, Blog writer SEO and story writer frontend migration complete

This commit is contained in:
ajaysi
2025-11-13 16:14:26 +05:30
parent 7191c7e7f0
commit 3b9356e2c8
124 changed files with 20055 additions and 1208 deletions

View File

@@ -0,0 +1,186 @@
"""
Introduction Generator - Generates varied blog introductions based on content and research.
Generates 3 different introduction options for the user to choose from.
"""
from typing import Dict, Any, List
from loguru import logger
from models.blog_models import BlogResearchResponse, BlogOutlineSection
class IntroductionGenerator:
"""Generates blog introductions using research and content data."""
def __init__(self):
"""Initialize the introduction generator."""
pass
def build_introduction_prompt(
self,
blog_title: str,
research: BlogResearchResponse,
outline: List[BlogOutlineSection],
sections_content: Dict[str, str],
primary_keywords: List[str],
search_intent: str
) -> str:
"""Build a prompt for generating blog introductions."""
# Extract key research insights
keyword_analysis = research.keyword_analysis or {}
content_angles = research.suggested_angles or []
# Get a summary of the first few sections for context
section_summaries = []
for i, section in enumerate(outline[:3], 1):
section_id = section.id
content = sections_content.get(section_id, '')
if content:
# Take first 200 chars as summary
summary = content[:200] + '...' if len(content) > 200 else content
section_summaries.append(f"{i}. {section.heading}: {summary}")
sections_text = '\n'.join(section_summaries) if section_summaries else "Content sections are being generated."
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the topic"
content_angle_text = ', '.join(content_angles[:3]) if content_angles else "General insights"
return f"""Generate exactly 3 varied blog introductions for the following blog post.
BLOG TITLE: {blog_title}
PRIMARY KEYWORDS: {primary_kw_text}
SEARCH INTENT: {search_intent}
CONTENT ANGLES: {content_angle_text}
BLOG CONTENT SUMMARY:
{sections_text}
REQUIREMENTS FOR EACH INTRODUCTION:
- 80-120 words in length
- Hook the reader immediately with a compelling opening
- Clearly state the value proposition and what readers will learn
- Include the primary keyword naturally within the first 2 sentences
- Each introduction should have a different angle/approach:
1. First: Problem-focused (highlight the challenge readers face)
2. Second: Benefit-focused (emphasize the value and outcomes)
3. Third: Story/statistic-focused (use a compelling fact or narrative hook)
- Maintain a professional yet engaging tone
- Avoid generic phrases - be specific and benefit-driven
Return ONLY a JSON array of exactly 3 introductions:
[
"First introduction (80-120 words, problem-focused)",
"Second introduction (80-120 words, benefit-focused)",
"Third introduction (80-120 words, story/statistic-focused)"
]"""
def get_introduction_schema(self) -> Dict[str, Any]:
"""Get the JSON schema for introduction generation."""
return {
"type": "array",
"items": {
"type": "string",
"minLength": 80,
"maxLength": 150
},
"minItems": 3,
"maxItems": 3
}
async def generate_introductions(
self,
blog_title: str,
research: BlogResearchResponse,
outline: List[BlogOutlineSection],
sections_content: Dict[str, str],
primary_keywords: List[str],
search_intent: str,
user_id: str
) -> List[str]:
"""Generate 3 varied blog introductions.
Args:
blog_title: The blog post title
research: Research data with keywords and insights
outline: Blog outline sections
sections_content: Dictionary mapping section IDs to their content
primary_keywords: Primary keywords for the blog
search_intent: Search intent (informational, commercial, etc.)
user_id: User ID for API calls
Returns:
List of 3 introduction options
"""
from services.llm_providers.main_text_generation import llm_text_gen
if not user_id:
raise ValueError("user_id is required for introduction generation")
# Build prompt
prompt = self.build_introduction_prompt(
blog_title=blog_title,
research=research,
outline=outline,
sections_content=sections_content,
primary_keywords=primary_keywords,
search_intent=search_intent
)
# Get schema
schema = self.get_introduction_schema()
logger.info(f"Generating blog introductions for user {user_id}")
try:
# Generate introductions using structured JSON response
result = llm_text_gen(
prompt=prompt,
json_struct=schema,
system_prompt="You are an expert content writer specializing in creating compelling blog introductions that hook readers and clearly communicate value.",
user_id=user_id
)
# Handle response - could be array directly or wrapped in dict
if isinstance(result, list):
introductions = result
elif isinstance(result, dict):
# Try common keys
introductions = result.get('introductions', result.get('options', result.get('intros', [])))
if not introductions and isinstance(result.get('response'), list):
introductions = result['response']
else:
logger.warning(f"Unexpected introduction generation result type: {type(result)}")
introductions = []
# Validate and clean introductions
cleaned_introductions = []
for intro in introductions:
if isinstance(intro, str) and len(intro.strip()) >= 50: # Minimum reasonable length
cleaned = intro.strip()
# Ensure it's within reasonable bounds
if len(cleaned) <= 200: # Allow slight overflow for quality
cleaned_introductions.append(cleaned)
# Ensure we have exactly 3 introductions
if len(cleaned_introductions) < 3:
logger.warning(f"Generated only {len(cleaned_introductions)} introductions, expected 3")
# Pad with placeholder if needed
while len(cleaned_introductions) < 3:
cleaned_introductions.append(f"{blog_title} - A comprehensive guide covering essential insights and practical strategies.")
# Return exactly 3 introductions
return cleaned_introductions[:3]
except Exception as e:
logger.error(f"Failed to generate introductions: {e}")
# Fallback: generate simple introductions
fallback_introductions = [
f"In this comprehensive guide, we'll explore {primary_keywords[0] if primary_keywords else 'essential insights'} and provide actionable strategies.",
f"Discover everything you need to know about {primary_keywords[0] if primary_keywords else 'this topic'} and how it can transform your approach.",
f"Whether you're new to {primary_keywords[0] if primary_keywords else 'this topic'} or looking to deepen your understanding, this guide has you covered."
]
return fallback_introductions

View File

@@ -5,7 +5,6 @@ Constructs comprehensive prompts with research data, keywords, and strategic req
"""
from typing import Dict, Any, List
from loguru import logger
class PromptBuilder:
@@ -23,7 +22,18 @@ class PromptBuilder:
# Use the filtered research data (already cleaned by ResearchDataFilter)
research = request.research
return f"""Create a comprehensive blog outline for: {', '.join(primary_keywords)}
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
return f"""Create a comprehensive blog outline for: {primary_kw_text}
CONTEXT:
Search Intent: {search_intent}
@@ -32,19 +42,19 @@ Industry: {getattr(request.persona, 'industry', 'General') if request.persona el
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
KEYWORDS:
Primary: {', '.join(primary_keywords)}
Secondary: {', '.join(secondary_keywords)}
Long-tail: {', '.join(research.keyword_analysis.get('long_tail', []))}
Semantic: {', '.join(research.keyword_analysis.get('semantic_keywords', []))}
Trending: {', '.join(research.keyword_analysis.get('trending_terms', []))}
Content Gaps: {', '.join(research.keyword_analysis.get('content_gaps', []))}
Primary: {primary_kw_text}
Secondary: {secondary_kw_text}
Long-tail: {long_tail_text}
Semantic: {semantic_text}
Trending: {trending_text}
Content Gaps: {content_gap_text}
CONTENT ANGLES: {', '.join(content_angles)}
CONTENT ANGLES / STORYLINES: {content_angle_text}
COMPETITIVE INTELLIGENCE:
Top Competitors: {', '.join(research.competitor_analysis.get('top_competitors', []))}
Market Opportunities: {', '.join(research.competitor_analysis.get('opportunities', []))}
Competitive Advantages: {', '.join(research.competitor_analysis.get('competitive_advantages', []))}
Top Competitors: {competitor_text}
Market Opportunities: {opportunity_text}
Competitive Advantages: {advantages_text}
RESEARCH SOURCES: {len(sources)} authoritative sources available
@@ -52,6 +62,7 @@ RESEARCH SOURCES: {len(sources)} authoritative sources available
STRATEGIC REQUIREMENTS:
- Create SEO-optimized headings with natural keyword integration
- Surface the strongest research-backed angles within the outline
- Build logical narrative flow from problem to solution
- Include data-driven insights from research sources
- Address content gaps and market opportunities
@@ -59,23 +70,34 @@ STRATEGIC REQUIREMENTS:
- Ensure engaging, actionable content throughout
Return JSON format:
{{
"outline": [
{{
"heading": "Section heading with primary keyword",
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
{
"title_options": [
"Title option 1",
"Title option 2",
"Title option 3"
],
"outline": [
{
"heading": "Section heading with primary keyword",
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
"target_words": 300,
"keywords": ["primary keyword", "secondary keyword"]
}}
]
}}"""
"keywords": ["primary keyword", "secondary keyword"]
}
]
}"""
def get_outline_schema(self) -> Dict[str, Any]:
"""Get the structured JSON schema for outline generation."""
return {
"type": "object",
"properties": {
"title_options": {
"type": "array",
"items": {
"type": "string"
}
},
"outline": {
"type": "array",
"items": {
@@ -100,6 +122,6 @@ Return JSON format:
}
}
},
"required": ["outline"],
"propertyOrdering": ["outline"]
"required": ["title_options", "outline"],
"propertyOrdering": ["title_options", "outline"]
}

View File

@@ -0,0 +1,198 @@
"""
SEO Title Generator - Specialized service for generating SEO-optimized blog titles.
Generates 5 premium SEO-optimized titles using research data and outline context.
"""
from typing import Dict, Any, List
from loguru import logger
from models.blog_models import BlogResearchResponse, BlogOutlineSection
class SEOTitleGenerator:
"""Generates SEO-optimized blog titles using research and outline data."""
def __init__(self):
"""Initialize the SEO title generator."""
pass
def build_title_prompt(
self,
research: BlogResearchResponse,
outline: List[BlogOutlineSection],
primary_keywords: List[str],
secondary_keywords: List[str],
content_angles: List[str],
search_intent: str,
word_count: int = 1500
) -> str:
"""Build a specialized prompt for SEO title generation."""
# Extract key research insights
keyword_analysis = research.keyword_analysis or {}
competitor_analysis = research.competitor_analysis or {}
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the target topic"
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
long_tail_text = ', '.join(keyword_analysis.get('long_tail', [])) if keyword_analysis else "None discovered"
semantic_text = ', '.join(keyword_analysis.get('semantic_keywords', [])) if keyword_analysis else "None discovered"
trending_text = ', '.join(keyword_analysis.get('trending_terms', [])) if keyword_analysis else "None discovered"
content_gap_text = ', '.join(keyword_analysis.get('content_gaps', [])) if keyword_analysis else "None identified"
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided"
# Extract outline structure summary
outline_summary = []
for i, section in enumerate(outline[:5], 1): # Limit to first 5 sections for context
outline_summary.append(f"{i}. {section.heading}")
if section.subheadings:
outline_summary.append(f" Subtopics: {', '.join(section.subheadings[:3])}")
outline_text = '\n'.join(outline_summary) if outline_summary else "No outline available"
return f"""Generate exactly 5 SEO-optimized blog titles for: {primary_kw_text}
RESEARCH CONTEXT:
Primary Keywords: {primary_kw_text}
Secondary Keywords: {secondary_kw_text}
Long-tail Keywords: {long_tail_text}
Semantic Keywords: {semantic_text}
Trending Terms: {trending_text}
Content Gaps: {content_gap_text}
Search Intent: {search_intent}
Content Angles: {content_angle_text}
OUTLINE STRUCTURE:
{outline_text}
COMPETITIVE INTELLIGENCE:
Top Competitors: {', '.join(competitor_analysis.get('top_competitors', [])) if competitor_analysis else 'Not available'}
Market Opportunities: {', '.join(competitor_analysis.get('opportunities', [])) if competitor_analysis else 'Not available'}
SEO REQUIREMENTS:
- Each title must be 50-65 characters (optimal for search engine display)
- Include the primary keyword within the first 55 characters
- Highlight a unique value proposition from the research angles
- Use power words that drive clicks (e.g., "Ultimate", "Complete", "Essential", "Proven")
- Avoid generic phrasing - be specific and benefit-focused
- Target the search intent: {search_intent}
- Ensure titles are compelling and click-worthy
Return ONLY a JSON array of exactly 5 titles:
[
"Title 1 (50-65 chars)",
"Title 2 (50-65 chars)",
"Title 3 (50-65 chars)",
"Title 4 (50-65 chars)",
"Title 5 (50-65 chars)"
]"""
def get_title_schema(self) -> Dict[str, Any]:
"""Get the JSON schema for title generation."""
return {
"type": "array",
"items": {
"type": "string",
"minLength": 50,
"maxLength": 65
},
"minItems": 5,
"maxItems": 5
}
async def generate_seo_titles(
self,
research: BlogResearchResponse,
outline: List[BlogOutlineSection],
primary_keywords: List[str],
secondary_keywords: List[str],
content_angles: List[str],
search_intent: str,
word_count: int,
user_id: str
) -> List[str]:
"""Generate SEO-optimized titles using research and outline data.
Args:
research: Research data with keywords and insights
outline: Blog outline sections
primary_keywords: Primary keywords for the blog
secondary_keywords: Secondary keywords
content_angles: Content angles from research
search_intent: Search intent (informational, commercial, etc.)
word_count: Target word count
user_id: User ID for API calls
Returns:
List of 5 SEO-optimized titles
"""
from services.llm_providers.main_text_generation import llm_text_gen
if not user_id:
raise ValueError("user_id is required for title generation")
# Build specialized prompt
prompt = self.build_title_prompt(
research=research,
outline=outline,
primary_keywords=primary_keywords,
secondary_keywords=secondary_keywords,
content_angles=content_angles,
search_intent=search_intent,
word_count=word_count
)
# Get schema
schema = self.get_title_schema()
logger.info(f"Generating SEO-optimized titles for user {user_id}")
try:
# Generate titles using structured JSON response
result = llm_text_gen(
prompt=prompt,
json_struct=schema,
system_prompt="You are an expert SEO content strategist specializing in creating compelling, search-optimized blog titles.",
user_id=user_id
)
# Handle response - could be array directly or wrapped in dict
if isinstance(result, list):
titles = result
elif isinstance(result, dict):
# Try common keys
titles = result.get('titles', result.get('title_options', result.get('options', [])))
if not titles and isinstance(result.get('response'), list):
titles = result['response']
else:
logger.warning(f"Unexpected title generation result type: {type(result)}")
titles = []
# Validate and clean titles
cleaned_titles = []
for title in titles:
if isinstance(title, str) and len(title.strip()) >= 30: # Minimum reasonable length
cleaned = title.strip()
# Ensure it's within reasonable bounds (allow slight overflow for quality)
if len(cleaned) <= 70: # Allow slight overflow for quality
cleaned_titles.append(cleaned)
# Ensure we have exactly 5 titles
if len(cleaned_titles) < 5:
logger.warning(f"Generated only {len(cleaned_titles)} titles, expected 5")
# Pad with placeholder if needed (shouldn't happen with proper schema)
while len(cleaned_titles) < 5:
cleaned_titles.append(f"{primary_keywords[0] if primary_keywords else 'Blog'} - Comprehensive Guide")
# Return exactly 5 titles
return cleaned_titles[:5]
except Exception as e:
logger.error(f"Failed to generate SEO titles: {e}")
# Fallback: generate simple titles from keywords
fallback_titles = []
primary = primary_keywords[0] if primary_keywords else "Blog Post"
for i in range(5):
fallback_titles.append(f"{primary}: Complete Guide {i+1}")
return fallback_titles

View File

@@ -74,7 +74,9 @@ class ResearchService:
if cached_result:
logger.info(f"Returning cached research result for keywords: {request.keywords}")
blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True)
return BlogResearchResponse(**cached_result)
# Normalize cached data to fix None values in confidence_scores
normalized_result = self._normalize_cached_research_data(cached_result)
return BlogResearchResponse(**normalized_result)
# User ID validation (validation logic is now in Google Grounding provider)
if not user_id:
@@ -421,7 +423,9 @@ class ResearchService:
if cached_result:
await task_manager.update_progress(task_id, "✅ Found cached research results! Returning instantly...")
logger.info(f"Returning cached research result for keywords: {request.keywords}")
return BlogResearchResponse(**cached_result)
# Normalize cached data to fix None values in confidence_scores
normalized_result = self._normalize_cached_research_data(cached_result)
return BlogResearchResponse(**normalized_result)
# User ID validation
if not user_id:
@@ -759,6 +763,49 @@ class ResearchService:
return sources
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize cached research data to fix None values in confidence_scores.
Ensures all GroundingSupport objects have confidence_scores as a list.
"""
if not isinstance(cached_data, dict):
return cached_data
normalized = cached_data.copy()
# Normalize grounding_metadata if present
if "grounding_metadata" in normalized and normalized["grounding_metadata"]:
grounding_metadata = normalized["grounding_metadata"].copy() if isinstance(normalized["grounding_metadata"], dict) else {}
# Normalize grounding_supports
if "grounding_supports" in grounding_metadata and isinstance(grounding_metadata["grounding_supports"], list):
normalized_supports = []
for support in grounding_metadata["grounding_supports"]:
if isinstance(support, dict):
normalized_support = support.copy()
# Fix confidence_scores: ensure it's a list, not None
if normalized_support.get("confidence_scores") is None:
normalized_support["confidence_scores"] = []
elif not isinstance(normalized_support.get("confidence_scores"), list):
# If it's not a list, try to convert or default to empty list
normalized_support["confidence_scores"] = []
# Fix grounding_chunk_indices: ensure it's a list, not None
if normalized_support.get("grounding_chunk_indices") is None:
normalized_support["grounding_chunk_indices"] = []
elif not isinstance(normalized_support.get("grounding_chunk_indices"), list):
normalized_support["grounding_chunk_indices"] = []
# Ensure segment_text is a string
if normalized_support.get("segment_text") is None:
normalized_support["segment_text"] = ""
normalized_supports.append(normalized_support)
else:
normalized_supports.append(support)
grounding_metadata["grounding_supports"] = normalized_supports
normalized["grounding_metadata"] = grounding_metadata
return normalized
def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> GroundingMetadata:
"""Extract detailed grounding metadata from Gemini result."""
grounding_chunks = []