story writer backend migration complete, Blog writer SEO and story writer backend migration complete, Blog writer SEO and story writer frontend migration complete
This commit is contained in:
186
backend/services/blog_writer/content/introduction_generator.py
Normal file
186
backend/services/blog_writer/content/introduction_generator.py
Normal file
@@ -0,0 +1,186 @@
|
||||
"""
|
||||
Introduction Generator - Generates varied blog introductions based on content and research.
|
||||
|
||||
Generates 3 different introduction options for the user to choose from.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
|
||||
from models.blog_models import BlogResearchResponse, BlogOutlineSection
|
||||
|
||||
|
||||
class IntroductionGenerator:
|
||||
"""Generates blog introductions using research and content data."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the introduction generator."""
|
||||
pass
|
||||
|
||||
def build_introduction_prompt(
|
||||
self,
|
||||
blog_title: str,
|
||||
research: BlogResearchResponse,
|
||||
outline: List[BlogOutlineSection],
|
||||
sections_content: Dict[str, str],
|
||||
primary_keywords: List[str],
|
||||
search_intent: str
|
||||
) -> str:
|
||||
"""Build a prompt for generating blog introductions."""
|
||||
|
||||
# Extract key research insights
|
||||
keyword_analysis = research.keyword_analysis or {}
|
||||
content_angles = research.suggested_angles or []
|
||||
|
||||
# Get a summary of the first few sections for context
|
||||
section_summaries = []
|
||||
for i, section in enumerate(outline[:3], 1):
|
||||
section_id = section.id
|
||||
content = sections_content.get(section_id, '')
|
||||
if content:
|
||||
# Take first 200 chars as summary
|
||||
summary = content[:200] + '...' if len(content) > 200 else content
|
||||
section_summaries.append(f"{i}. {section.heading}: {summary}")
|
||||
|
||||
sections_text = '\n'.join(section_summaries) if section_summaries else "Content sections are being generated."
|
||||
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the topic"
|
||||
content_angle_text = ', '.join(content_angles[:3]) if content_angles else "General insights"
|
||||
|
||||
return f"""Generate exactly 3 varied blog introductions for the following blog post.
|
||||
|
||||
BLOG TITLE: {blog_title}
|
||||
|
||||
PRIMARY KEYWORDS: {primary_kw_text}
|
||||
SEARCH INTENT: {search_intent}
|
||||
CONTENT ANGLES: {content_angle_text}
|
||||
|
||||
BLOG CONTENT SUMMARY:
|
||||
{sections_text}
|
||||
|
||||
REQUIREMENTS FOR EACH INTRODUCTION:
|
||||
- 80-120 words in length
|
||||
- Hook the reader immediately with a compelling opening
|
||||
- Clearly state the value proposition and what readers will learn
|
||||
- Include the primary keyword naturally within the first 2 sentences
|
||||
- Each introduction should have a different angle/approach:
|
||||
1. First: Problem-focused (highlight the challenge readers face)
|
||||
2. Second: Benefit-focused (emphasize the value and outcomes)
|
||||
3. Third: Story/statistic-focused (use a compelling fact or narrative hook)
|
||||
- Maintain a professional yet engaging tone
|
||||
- Avoid generic phrases - be specific and benefit-driven
|
||||
|
||||
Return ONLY a JSON array of exactly 3 introductions:
|
||||
[
|
||||
"First introduction (80-120 words, problem-focused)",
|
||||
"Second introduction (80-120 words, benefit-focused)",
|
||||
"Third introduction (80-120 words, story/statistic-focused)"
|
||||
]"""
|
||||
|
||||
def get_introduction_schema(self) -> Dict[str, Any]:
|
||||
"""Get the JSON schema for introduction generation."""
|
||||
return {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minLength": 80,
|
||||
"maxLength": 150
|
||||
},
|
||||
"minItems": 3,
|
||||
"maxItems": 3
|
||||
}
|
||||
|
||||
async def generate_introductions(
|
||||
self,
|
||||
blog_title: str,
|
||||
research: BlogResearchResponse,
|
||||
outline: List[BlogOutlineSection],
|
||||
sections_content: Dict[str, str],
|
||||
primary_keywords: List[str],
|
||||
search_intent: str,
|
||||
user_id: str
|
||||
) -> List[str]:
|
||||
"""Generate 3 varied blog introductions.
|
||||
|
||||
Args:
|
||||
blog_title: The blog post title
|
||||
research: Research data with keywords and insights
|
||||
outline: Blog outline sections
|
||||
sections_content: Dictionary mapping section IDs to their content
|
||||
primary_keywords: Primary keywords for the blog
|
||||
search_intent: Search intent (informational, commercial, etc.)
|
||||
user_id: User ID for API calls
|
||||
|
||||
Returns:
|
||||
List of 3 introduction options
|
||||
"""
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for introduction generation")
|
||||
|
||||
# Build prompt
|
||||
prompt = self.build_introduction_prompt(
|
||||
blog_title=blog_title,
|
||||
research=research,
|
||||
outline=outline,
|
||||
sections_content=sections_content,
|
||||
primary_keywords=primary_keywords,
|
||||
search_intent=search_intent
|
||||
)
|
||||
|
||||
# Get schema
|
||||
schema = self.get_introduction_schema()
|
||||
|
||||
logger.info(f"Generating blog introductions for user {user_id}")
|
||||
|
||||
try:
|
||||
# Generate introductions using structured JSON response
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt="You are an expert content writer specializing in creating compelling blog introductions that hook readers and clearly communicate value.",
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Handle response - could be array directly or wrapped in dict
|
||||
if isinstance(result, list):
|
||||
introductions = result
|
||||
elif isinstance(result, dict):
|
||||
# Try common keys
|
||||
introductions = result.get('introductions', result.get('options', result.get('intros', [])))
|
||||
if not introductions and isinstance(result.get('response'), list):
|
||||
introductions = result['response']
|
||||
else:
|
||||
logger.warning(f"Unexpected introduction generation result type: {type(result)}")
|
||||
introductions = []
|
||||
|
||||
# Validate and clean introductions
|
||||
cleaned_introductions = []
|
||||
for intro in introductions:
|
||||
if isinstance(intro, str) and len(intro.strip()) >= 50: # Minimum reasonable length
|
||||
cleaned = intro.strip()
|
||||
# Ensure it's within reasonable bounds
|
||||
if len(cleaned) <= 200: # Allow slight overflow for quality
|
||||
cleaned_introductions.append(cleaned)
|
||||
|
||||
# Ensure we have exactly 3 introductions
|
||||
if len(cleaned_introductions) < 3:
|
||||
logger.warning(f"Generated only {len(cleaned_introductions)} introductions, expected 3")
|
||||
# Pad with placeholder if needed
|
||||
while len(cleaned_introductions) < 3:
|
||||
cleaned_introductions.append(f"{blog_title} - A comprehensive guide covering essential insights and practical strategies.")
|
||||
|
||||
# Return exactly 3 introductions
|
||||
return cleaned_introductions[:3]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate introductions: {e}")
|
||||
# Fallback: generate simple introductions
|
||||
fallback_introductions = [
|
||||
f"In this comprehensive guide, we'll explore {primary_keywords[0] if primary_keywords else 'essential insights'} and provide actionable strategies.",
|
||||
f"Discover everything you need to know about {primary_keywords[0] if primary_keywords else 'this topic'} and how it can transform your approach.",
|
||||
f"Whether you're new to {primary_keywords[0] if primary_keywords else 'this topic'} or looking to deepen your understanding, this guide has you covered."
|
||||
]
|
||||
return fallback_introductions
|
||||
|
||||
@@ -5,7 +5,6 @@ Constructs comprehensive prompts with research data, keywords, and strategic req
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class PromptBuilder:
|
||||
@@ -23,7 +22,18 @@ class PromptBuilder:
|
||||
# Use the filtered research data (already cleaned by ResearchDataFilter)
|
||||
research = request.research
|
||||
|
||||
return f"""Create a comprehensive blog outline for: {', '.join(primary_keywords)}
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
|
||||
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
|
||||
long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
|
||||
semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
|
||||
trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
|
||||
content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
|
||||
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
|
||||
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
|
||||
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
|
||||
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
|
||||
|
||||
return f"""Create a comprehensive blog outline for: {primary_kw_text}
|
||||
|
||||
CONTEXT:
|
||||
Search Intent: {search_intent}
|
||||
@@ -32,19 +42,19 @@ Industry: {getattr(request.persona, 'industry', 'General') if request.persona el
|
||||
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
|
||||
|
||||
KEYWORDS:
|
||||
Primary: {', '.join(primary_keywords)}
|
||||
Secondary: {', '.join(secondary_keywords)}
|
||||
Long-tail: {', '.join(research.keyword_analysis.get('long_tail', []))}
|
||||
Semantic: {', '.join(research.keyword_analysis.get('semantic_keywords', []))}
|
||||
Trending: {', '.join(research.keyword_analysis.get('trending_terms', []))}
|
||||
Content Gaps: {', '.join(research.keyword_analysis.get('content_gaps', []))}
|
||||
Primary: {primary_kw_text}
|
||||
Secondary: {secondary_kw_text}
|
||||
Long-tail: {long_tail_text}
|
||||
Semantic: {semantic_text}
|
||||
Trending: {trending_text}
|
||||
Content Gaps: {content_gap_text}
|
||||
|
||||
CONTENT ANGLES: {', '.join(content_angles)}
|
||||
CONTENT ANGLES / STORYLINES: {content_angle_text}
|
||||
|
||||
COMPETITIVE INTELLIGENCE:
|
||||
Top Competitors: {', '.join(research.competitor_analysis.get('top_competitors', []))}
|
||||
Market Opportunities: {', '.join(research.competitor_analysis.get('opportunities', []))}
|
||||
Competitive Advantages: {', '.join(research.competitor_analysis.get('competitive_advantages', []))}
|
||||
Top Competitors: {competitor_text}
|
||||
Market Opportunities: {opportunity_text}
|
||||
Competitive Advantages: {advantages_text}
|
||||
|
||||
RESEARCH SOURCES: {len(sources)} authoritative sources available
|
||||
|
||||
@@ -52,6 +62,7 @@ RESEARCH SOURCES: {len(sources)} authoritative sources available
|
||||
|
||||
STRATEGIC REQUIREMENTS:
|
||||
- Create SEO-optimized headings with natural keyword integration
|
||||
- Surface the strongest research-backed angles within the outline
|
||||
- Build logical narrative flow from problem to solution
|
||||
- Include data-driven insights from research sources
|
||||
- Address content gaps and market opportunities
|
||||
@@ -59,23 +70,34 @@ STRATEGIC REQUIREMENTS:
|
||||
- Ensure engaging, actionable content throughout
|
||||
|
||||
Return JSON format:
|
||||
{{
|
||||
"outline": [
|
||||
{{
|
||||
"heading": "Section heading with primary keyword",
|
||||
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
|
||||
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
|
||||
{
|
||||
"title_options": [
|
||||
"Title option 1",
|
||||
"Title option 2",
|
||||
"Title option 3"
|
||||
],
|
||||
"outline": [
|
||||
{
|
||||
"heading": "Section heading with primary keyword",
|
||||
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
|
||||
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
|
||||
"target_words": 300,
|
||||
"keywords": ["primary keyword", "secondary keyword"]
|
||||
}}
|
||||
]
|
||||
}}"""
|
||||
"keywords": ["primary keyword", "secondary keyword"]
|
||||
}
|
||||
]
|
||||
}"""
|
||||
|
||||
def get_outline_schema(self) -> Dict[str, Any]:
|
||||
"""Get the structured JSON schema for outline generation."""
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title_options": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -100,6 +122,6 @@ Return JSON format:
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["outline"],
|
||||
"propertyOrdering": ["outline"]
|
||||
"required": ["title_options", "outline"],
|
||||
"propertyOrdering": ["title_options", "outline"]
|
||||
}
|
||||
|
||||
198
backend/services/blog_writer/outline/seo_title_generator.py
Normal file
198
backend/services/blog_writer/outline/seo_title_generator.py
Normal file
@@ -0,0 +1,198 @@
|
||||
"""
|
||||
SEO Title Generator - Specialized service for generating SEO-optimized blog titles.
|
||||
|
||||
Generates 5 premium SEO-optimized titles using research data and outline context.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from loguru import logger
|
||||
|
||||
from models.blog_models import BlogResearchResponse, BlogOutlineSection
|
||||
|
||||
|
||||
class SEOTitleGenerator:
|
||||
"""Generates SEO-optimized blog titles using research and outline data."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the SEO title generator."""
|
||||
pass
|
||||
|
||||
def build_title_prompt(
|
||||
self,
|
||||
research: BlogResearchResponse,
|
||||
outline: List[BlogOutlineSection],
|
||||
primary_keywords: List[str],
|
||||
secondary_keywords: List[str],
|
||||
content_angles: List[str],
|
||||
search_intent: str,
|
||||
word_count: int = 1500
|
||||
) -> str:
|
||||
"""Build a specialized prompt for SEO title generation."""
|
||||
|
||||
# Extract key research insights
|
||||
keyword_analysis = research.keyword_analysis or {}
|
||||
competitor_analysis = research.competitor_analysis or {}
|
||||
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the target topic"
|
||||
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
|
||||
long_tail_text = ', '.join(keyword_analysis.get('long_tail', [])) if keyword_analysis else "None discovered"
|
||||
semantic_text = ', '.join(keyword_analysis.get('semantic_keywords', [])) if keyword_analysis else "None discovered"
|
||||
trending_text = ', '.join(keyword_analysis.get('trending_terms', [])) if keyword_analysis else "None discovered"
|
||||
content_gap_text = ', '.join(keyword_analysis.get('content_gaps', [])) if keyword_analysis else "None identified"
|
||||
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided"
|
||||
|
||||
# Extract outline structure summary
|
||||
outline_summary = []
|
||||
for i, section in enumerate(outline[:5], 1): # Limit to first 5 sections for context
|
||||
outline_summary.append(f"{i}. {section.heading}")
|
||||
if section.subheadings:
|
||||
outline_summary.append(f" Subtopics: {', '.join(section.subheadings[:3])}")
|
||||
|
||||
outline_text = '\n'.join(outline_summary) if outline_summary else "No outline available"
|
||||
|
||||
return f"""Generate exactly 5 SEO-optimized blog titles for: {primary_kw_text}
|
||||
|
||||
RESEARCH CONTEXT:
|
||||
Primary Keywords: {primary_kw_text}
|
||||
Secondary Keywords: {secondary_kw_text}
|
||||
Long-tail Keywords: {long_tail_text}
|
||||
Semantic Keywords: {semantic_text}
|
||||
Trending Terms: {trending_text}
|
||||
Content Gaps: {content_gap_text}
|
||||
Search Intent: {search_intent}
|
||||
Content Angles: {content_angle_text}
|
||||
|
||||
OUTLINE STRUCTURE:
|
||||
{outline_text}
|
||||
|
||||
COMPETITIVE INTELLIGENCE:
|
||||
Top Competitors: {', '.join(competitor_analysis.get('top_competitors', [])) if competitor_analysis else 'Not available'}
|
||||
Market Opportunities: {', '.join(competitor_analysis.get('opportunities', [])) if competitor_analysis else 'Not available'}
|
||||
|
||||
SEO REQUIREMENTS:
|
||||
- Each title must be 50-65 characters (optimal for search engine display)
|
||||
- Include the primary keyword within the first 55 characters
|
||||
- Highlight a unique value proposition from the research angles
|
||||
- Use power words that drive clicks (e.g., "Ultimate", "Complete", "Essential", "Proven")
|
||||
- Avoid generic phrasing - be specific and benefit-focused
|
||||
- Target the search intent: {search_intent}
|
||||
- Ensure titles are compelling and click-worthy
|
||||
|
||||
Return ONLY a JSON array of exactly 5 titles:
|
||||
[
|
||||
"Title 1 (50-65 chars)",
|
||||
"Title 2 (50-65 chars)",
|
||||
"Title 3 (50-65 chars)",
|
||||
"Title 4 (50-65 chars)",
|
||||
"Title 5 (50-65 chars)"
|
||||
]"""
|
||||
|
||||
def get_title_schema(self) -> Dict[str, Any]:
|
||||
"""Get the JSON schema for title generation."""
|
||||
return {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minLength": 50,
|
||||
"maxLength": 65
|
||||
},
|
||||
"minItems": 5,
|
||||
"maxItems": 5
|
||||
}
|
||||
|
||||
async def generate_seo_titles(
|
||||
self,
|
||||
research: BlogResearchResponse,
|
||||
outline: List[BlogOutlineSection],
|
||||
primary_keywords: List[str],
|
||||
secondary_keywords: List[str],
|
||||
content_angles: List[str],
|
||||
search_intent: str,
|
||||
word_count: int,
|
||||
user_id: str
|
||||
) -> List[str]:
|
||||
"""Generate SEO-optimized titles using research and outline data.
|
||||
|
||||
Args:
|
||||
research: Research data with keywords and insights
|
||||
outline: Blog outline sections
|
||||
primary_keywords: Primary keywords for the blog
|
||||
secondary_keywords: Secondary keywords
|
||||
content_angles: Content angles from research
|
||||
search_intent: Search intent (informational, commercial, etc.)
|
||||
word_count: Target word count
|
||||
user_id: User ID for API calls
|
||||
|
||||
Returns:
|
||||
List of 5 SEO-optimized titles
|
||||
"""
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
if not user_id:
|
||||
raise ValueError("user_id is required for title generation")
|
||||
|
||||
# Build specialized prompt
|
||||
prompt = self.build_title_prompt(
|
||||
research=research,
|
||||
outline=outline,
|
||||
primary_keywords=primary_keywords,
|
||||
secondary_keywords=secondary_keywords,
|
||||
content_angles=content_angles,
|
||||
search_intent=search_intent,
|
||||
word_count=word_count
|
||||
)
|
||||
|
||||
# Get schema
|
||||
schema = self.get_title_schema()
|
||||
|
||||
logger.info(f"Generating SEO-optimized titles for user {user_id}")
|
||||
|
||||
try:
|
||||
# Generate titles using structured JSON response
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
json_struct=schema,
|
||||
system_prompt="You are an expert SEO content strategist specializing in creating compelling, search-optimized blog titles.",
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
# Handle response - could be array directly or wrapped in dict
|
||||
if isinstance(result, list):
|
||||
titles = result
|
||||
elif isinstance(result, dict):
|
||||
# Try common keys
|
||||
titles = result.get('titles', result.get('title_options', result.get('options', [])))
|
||||
if not titles and isinstance(result.get('response'), list):
|
||||
titles = result['response']
|
||||
else:
|
||||
logger.warning(f"Unexpected title generation result type: {type(result)}")
|
||||
titles = []
|
||||
|
||||
# Validate and clean titles
|
||||
cleaned_titles = []
|
||||
for title in titles:
|
||||
if isinstance(title, str) and len(title.strip()) >= 30: # Minimum reasonable length
|
||||
cleaned = title.strip()
|
||||
# Ensure it's within reasonable bounds (allow slight overflow for quality)
|
||||
if len(cleaned) <= 70: # Allow slight overflow for quality
|
||||
cleaned_titles.append(cleaned)
|
||||
|
||||
# Ensure we have exactly 5 titles
|
||||
if len(cleaned_titles) < 5:
|
||||
logger.warning(f"Generated only {len(cleaned_titles)} titles, expected 5")
|
||||
# Pad with placeholder if needed (shouldn't happen with proper schema)
|
||||
while len(cleaned_titles) < 5:
|
||||
cleaned_titles.append(f"{primary_keywords[0] if primary_keywords else 'Blog'} - Comprehensive Guide")
|
||||
|
||||
# Return exactly 5 titles
|
||||
return cleaned_titles[:5]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate SEO titles: {e}")
|
||||
# Fallback: generate simple titles from keywords
|
||||
fallback_titles = []
|
||||
primary = primary_keywords[0] if primary_keywords else "Blog Post"
|
||||
for i in range(5):
|
||||
fallback_titles.append(f"{primary}: Complete Guide {i+1}")
|
||||
return fallback_titles
|
||||
|
||||
@@ -74,7 +74,9 @@ class ResearchService:
|
||||
if cached_result:
|
||||
logger.info(f"Returning cached research result for keywords: {request.keywords}")
|
||||
blog_writer_logger.log_operation_end("research", 0, success=True, cache_hit=True)
|
||||
return BlogResearchResponse(**cached_result)
|
||||
# Normalize cached data to fix None values in confidence_scores
|
||||
normalized_result = self._normalize_cached_research_data(cached_result)
|
||||
return BlogResearchResponse(**normalized_result)
|
||||
|
||||
# User ID validation (validation logic is now in Google Grounding provider)
|
||||
if not user_id:
|
||||
@@ -421,7 +423,9 @@ class ResearchService:
|
||||
if cached_result:
|
||||
await task_manager.update_progress(task_id, "✅ Found cached research results! Returning instantly...")
|
||||
logger.info(f"Returning cached research result for keywords: {request.keywords}")
|
||||
return BlogResearchResponse(**cached_result)
|
||||
# Normalize cached data to fix None values in confidence_scores
|
||||
normalized_result = self._normalize_cached_research_data(cached_result)
|
||||
return BlogResearchResponse(**normalized_result)
|
||||
|
||||
# User ID validation
|
||||
if not user_id:
|
||||
@@ -759,6 +763,49 @@ class ResearchService:
|
||||
|
||||
return sources
|
||||
|
||||
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Normalize cached research data to fix None values in confidence_scores.
|
||||
Ensures all GroundingSupport objects have confidence_scores as a list.
|
||||
"""
|
||||
if not isinstance(cached_data, dict):
|
||||
return cached_data
|
||||
|
||||
normalized = cached_data.copy()
|
||||
|
||||
# Normalize grounding_metadata if present
|
||||
if "grounding_metadata" in normalized and normalized["grounding_metadata"]:
|
||||
grounding_metadata = normalized["grounding_metadata"].copy() if isinstance(normalized["grounding_metadata"], dict) else {}
|
||||
|
||||
# Normalize grounding_supports
|
||||
if "grounding_supports" in grounding_metadata and isinstance(grounding_metadata["grounding_supports"], list):
|
||||
normalized_supports = []
|
||||
for support in grounding_metadata["grounding_supports"]:
|
||||
if isinstance(support, dict):
|
||||
normalized_support = support.copy()
|
||||
# Fix confidence_scores: ensure it's a list, not None
|
||||
if normalized_support.get("confidence_scores") is None:
|
||||
normalized_support["confidence_scores"] = []
|
||||
elif not isinstance(normalized_support.get("confidence_scores"), list):
|
||||
# If it's not a list, try to convert or default to empty list
|
||||
normalized_support["confidence_scores"] = []
|
||||
# Fix grounding_chunk_indices: ensure it's a list, not None
|
||||
if normalized_support.get("grounding_chunk_indices") is None:
|
||||
normalized_support["grounding_chunk_indices"] = []
|
||||
elif not isinstance(normalized_support.get("grounding_chunk_indices"), list):
|
||||
normalized_support["grounding_chunk_indices"] = []
|
||||
# Ensure segment_text is a string
|
||||
if normalized_support.get("segment_text") is None:
|
||||
normalized_support["segment_text"] = ""
|
||||
normalized_supports.append(normalized_support)
|
||||
else:
|
||||
normalized_supports.append(support)
|
||||
grounding_metadata["grounding_supports"] = normalized_supports
|
||||
|
||||
normalized["grounding_metadata"] = grounding_metadata
|
||||
|
||||
return normalized
|
||||
|
||||
def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> GroundingMetadata:
|
||||
"""Extract detailed grounding metadata from Gemini result."""
|
||||
grounding_chunks = []
|
||||
|
||||
Reference in New Issue
Block a user