feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

This commit is contained in:
ajaysi
2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions

View File

@@ -0,0 +1,194 @@
"""
Keyword Curator - Smart keyword selection engine for SEO-optimized outline generation.
Instead of dumping all discovered keywords into the LLM prompt (which causes
keyword stuffing and dilutes topical focus), this module selects a highly
curated subset based on SEO best practices and assigns each keyword a
specific structural role in the outline.
"""
from typing import Dict, Any, List, Optional
class KeywordCurator:
"""
Curates a strict, minimal keyword set for outline generation.
Selection Rules (SEO Best Practice):
1. Primary (H1 Focus) → top 2 — brand name + core topic
2. Secondary (H2 Focus) → top 2 — feature/benefit anchors
3. Long-tail (H3 Focus) → top 2 — informational intent phrases
4. Semantic (Body Context) → top 4 — prevent topical drift
5. Trending (Mention) → top 2 — brief contextual mentions
6. Content Gap (Edge) → top 1 — competitive differentiator
"""
# How many keywords to select from each category
SLOTS: Dict[str, int] = {
"primary": 2,
"secondary": 2,
"long_tail": 2,
"semantic": 4,
"trending": 2,
"content_gap": 1,
}
def curate(
self,
keyword_analysis: Dict[str, Any],
) -> Dict[str, Any]:
"""
Apply selection rules and return a structured, minimal keyword payload.
Args:
keyword_analysis: Raw keyword_analysis dict from research
(keys: primary, secondary, long_tail,
semantic_keywords, trending_terms, content_gaps, ...)
Returns:
Dict with curated keyword groups plus all other analysis fields preserved.
"""
curated: Dict[str, Any] = {}
# --- Select from keyword lists ---
curated["primary"] = self._pick(keyword_analysis, "primary")
curated["secondary"] = self._pick(keyword_analysis, "secondary")
curated["long_tail"] = self._pick(keyword_analysis, "long_tail")
# semantic_keywords is the actual key in the research data
curated["semantic"] = self._pick(keyword_analysis, "semantic_keywords", slot_key="semantic")
curated["trending"] = self._pick(keyword_analysis, "trending_terms", slot_key="trending")
curated["content_gap"] = self._pick(keyword_analysis, "content_gaps", slot_key="content_gap")
# --- Build a flat "locked" set for quick reference ---
locked: List[str] = []
for group in curated.values():
if isinstance(group, list):
locked.extend(group)
curated["locked_keywords"] = locked
# --- Track counts for transparency ---
total_raw = 0
total_curated = 0
for source_key, limit in self.SLOTS.items():
raw_key = self._source_key(source_key)
raw_list = keyword_analysis.get(raw_key, [])
total_raw += len(raw_list) if isinstance(raw_list, list) else 0
curated_list = curated.get(source_key, [])
total_curated += len(curated_list) if isinstance(curated_list, list) else 0
curated["stats"] = {
"total_raw": total_raw,
"total_curated": total_curated,
"reduction_pct": round((1 - total_curated / max(total_raw, 1)) * 100, 1),
}
# --- Preserve non-keyword analysis fields ---
for field in ("search_intent", "difficulty", "analysis_insights"):
if field in keyword_analysis:
curated[field] = keyword_analysis[field]
return curated
def format_for_prompt(self, curated: Dict[str, Any]) -> str:
"""
Format the curated keyword payload into a strict structural prompt section.
Returns a string ready to be injected into the outline prompt.
"""
lines: List[str] = []
lines.append("## KEYWORD PLACEMENT DIRECTIVES\n")
# H1 — primary
primary = curated.get("primary", [])
if primary:
h1_text = " | ".join(primary)
lines.append(f"### H1 (must contain, in order of priority): {h1_text}")
lines.append(" → Anchor the title and main heading on these terms.")
else:
lines.append("### H1: No primary keywords provided — derive from topic context.")
# H2 — secondary
secondary = curated.get("secondary", [])
if secondary:
lines.append(f"### H2 sections must anchor on (one per major section): {', '.join(secondary)}")
lines.append(" → Each secondary keyword should map to a distinct H2 section.")
# H3 — long-tail
long_tail = curated.get("long_tail", [])
if long_tail:
lines.append(f"### H3 / Subsection anchors for informational intent: {', '.join(long_tail)}")
lines.append(" → Use these as deeper-dive subsections under the relevant H2.")
# Body-level — semantic
semantic = curated.get("semantic", [])
if semantic:
lines.append(f"### Body-level semantic signals (use naturally, max 1-2 mentions each): {', '.join(semantic)}")
lines.append(" → These prevent topical drift. Weave into paragraph text, not headings.")
# Trending — brief
trending = curated.get("trending", [])
if trending:
lines.append(f"### Trending context (mention subtly if relevant): {', '.join(trending)}")
lines.append(" → Optional. Only include if it strengthens timeliness/narrative.")
# Content gap — competitive edge
content_gap = curated.get("content_gap", [])
if content_gap:
lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
lines.append(" → This is your primary differentiation hook. Surface it prominently in the unique value section.")
lines.append("")
lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
lines.append("Quality over density — each keyword earns its place by serving a clear structural purpose.")
stats = curated.get("stats", {})
if stats:
lines.append(
f"\n[From {stats.get('total_raw', '?')} raw research keywords "
f"→ curated to {stats.get('total_curated', '?')} locked keywords "
f"({stats.get('reduction_pct', '?')}% reduction)]"
)
return "\n".join(lines)
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
@staticmethod
def _source_key(slot_key: str) -> str:
"""Map internal slot key to the actual field name in keyword_analysis."""
mapping = {
"primary": "primary",
"secondary": "secondary",
"long_tail": "long_tail",
"semantic": "semantic_keywords",
"trending": "trending_terms",
"content_gap": "content_gaps",
}
return mapping.get(slot_key, slot_key)
def _pick(
self,
data: Dict[str, Any],
source_key: str,
slot_key: Optional[str] = None,
) -> List[str]:
"""
Pick up to N items from a keyword list.
Args:
data: The raw keyword_analysis dict.
source_key: The actual key in the dict (e.g. 'semantic_keywords').
slot_key: The internal slot name for looking up the limit.
Falls back to source_key if not provided.
Returns:
Sliced list of at most N strings.
"""
limit_key = slot_key or source_key
limit = self.SLOTS.get(limit_key, 5)
raw: Any = data.get(source_key, [])
if not isinstance(raw, list):
return []
return raw[:limit]

View File

@@ -1,7 +1,7 @@
"""
Metadata Collector - Handles collection and formatting of outline metadata.
Collects source mapping stats, grounding insights, optimization results, and research coverage.
Collects source mapping stats, grounding insights, and research coverage.
"""
from typing import Dict, Any, List
@@ -54,31 +54,6 @@ class MetadataCollector:
quality_indicators=grounding_insights.get('quality_indicators')
)
def collect_optimization_results(self, optimized_sections, focus):
"""Collect optimization results for UI display."""
from models.blog_models import OptimizationResults
# Calculate a quality score based on section completeness
total_sections = len(optimized_sections)
complete_sections = sum(1 for section in optimized_sections
if section.heading and section.subheadings and section.key_points)
quality_score = (complete_sections / total_sections * 10) if total_sections > 0 else 0.0
improvements_made = [
"Enhanced section headings for better SEO",
"Optimized keyword distribution across sections",
"Improved content flow and logical progression",
"Balanced word count distribution",
"Enhanced subheadings for better readability"
]
return OptimizationResults(
overall_quality_score=round(quality_score, 1),
improvements_made=improvements_made,
optimization_focus=focus
)
def collect_research_coverage(self, research):
"""Collect research coverage metrics for UI display."""
from models.blog_models import ResearchCoverage

View File

@@ -1,7 +1,8 @@
"""
Outline Generator - AI-powered outline generation from research data.
Generates comprehensive, SEO-optimized outlines using research intelligence.
Generates comprehensive, SEO-optimized outlines using research intelligence
and a keyword-curation engine that prevents keyword stuffing.
"""
from typing import Dict, Any, List, Tuple
@@ -23,6 +24,7 @@ from .metadata_collector import MetadataCollector
from .prompt_builder import PromptBuilder
from .response_processor import ResponseProcessor
from .parallel_processor import ParallelProcessor
from .keyword_curator import KeywordCurator
class OutlineGenerator:
@@ -41,6 +43,14 @@ class OutlineGenerator:
self.prompt_builder = PromptBuilder()
self.response_processor = ResponseProcessor()
self.parallel_processor = ParallelProcessor(self.source_mapper, self.grounding_engine)
# Keyword curation engine
self.keyword_curator = KeywordCurator()
def _curate_keywords(self, research) -> Dict[str, Any]:
"""Run keyword curation on the research data's keyword_analysis."""
raw_analysis = research.keyword_analysis if research else {}
return self.keyword_curator.curate(raw_analysis)
async def generate(self, request: BlogOutlineRequest, user_id: str) -> BlogOutlineResponse:
"""
@@ -59,18 +69,24 @@ class OutlineGenerator:
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
curated_keywords = self._curate_keywords(research)
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
# Selected (prioritized) content angle and competitive advantage, if any
selected_content_angle = getattr(request, 'selected_content_angle', None)
selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
# Build comprehensive outline generation prompt with rich research data
# Build comprehensive outline generation prompt with curated keyword payload
outline_prompt = self.prompt_builder.build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
curated_keywords, content_angles, sources,
search_intent, request, custom_instructions, selected_content_angle,
selected_competitive_advantage
)
logger.info("Generating AI-powered outline using research results")
@@ -107,7 +123,7 @@ class OutlineGenerator:
ai_title_options = outline_data.get('title_options', [])
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
# Combine AI-generated titles with content angles
# Combine AI-generated titles with content angles (full primary keywords for title variety)
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
@@ -115,7 +131,6 @@ class OutlineGenerator:
# Collect metadata for enhanced UI
source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
research_coverage = self.metadata_collector.collect_research_coverage(research)
return BlogOutlineResponse(
@@ -124,7 +139,6 @@ class OutlineGenerator:
outline=balanced_sections,
source_mapping_stats=source_mapping_stats,
grounding_insights=grounding_insights_data,
optimization_results=optimization_results,
research_coverage=research_coverage
)
@@ -148,20 +162,26 @@ class OutlineGenerator:
# Extract research insights
research = request.research
primary_keywords = research.keyword_analysis.get('primary', [])
secondary_keywords = research.keyword_analysis.get('secondary', [])
content_angles = research.suggested_angles
sources = research.sources
search_intent = research.keyword_analysis.get('search_intent', 'informational')
# Curate keywords — reduces 40+ raw keywords to ~13 locked, role-assigned keywords
curated_keywords = self._curate_keywords(research)
# Check for custom instructions
custom_instructions = getattr(request, 'custom_instructions', None)
# Selected (prioritized) content angle and competitive advantage, if any
selected_content_angle = getattr(request, 'selected_content_angle', None)
selected_competitive_advantage = getattr(request, 'selected_competitive_advantage', None)
await task_manager.update_progress(task_id, "📊 Analyzing research data and building content strategy...")
# Build comprehensive outline generation prompt with rich research data
# Build comprehensive outline generation prompt with curated keyword payload
outline_prompt = self.prompt_builder.build_outline_prompt(
primary_keywords, secondary_keywords, content_angles, sources,
search_intent, request, custom_instructions
curated_keywords, content_angles, sources,
search_intent, request, custom_instructions, selected_content_angle,
selected_competitive_advantage
)
await task_manager.update_progress(task_id, "🤖 Generating AI-powered outline with research insights...")
@@ -203,7 +223,7 @@ class OutlineGenerator:
ai_title_options = outline_data.get('title_options', [])
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
# Combine AI-generated titles with content angles
# Combine AI-generated titles with content angles (full primary keywords for title variety)
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")
@@ -211,7 +231,6 @@ class OutlineGenerator:
# Collect metadata for enhanced UI
source_mapping_stats = self.metadata_collector.collect_source_mapping_stats(mapped_sections, research)
grounding_insights_data = self.metadata_collector.collect_grounding_insights(grounding_insights)
optimization_results = self.metadata_collector.collect_optimization_results(optimized_sections, "comprehensive optimization")
research_coverage = self.metadata_collector.collect_research_coverage(research)
return BlogOutlineResponse(
@@ -220,7 +239,6 @@ class OutlineGenerator:
outline=balanced_sections,
source_mapping_stats=source_mapping_stats,
grounding_insights=grounding_insights_data,
optimization_results=optimization_results,
research_coverage=research_coverage
)
@@ -320,4 +338,3 @@ class OutlineGenerator:
return insights

View File

@@ -1,10 +1,12 @@
"""
Prompt Builder - Handles building of AI prompts for outline generation.
Constructs comprehensive prompts with research data, keywords, and strategic requirements.
Constructs comprehensive prompts using curated keyword payloads,
research data, and strategic requirements.
"""
from typing import Dict, Any, List
from datetime import datetime
class PromptBuilder:
@@ -14,53 +16,105 @@ class PromptBuilder:
"""Initialize the prompt builder."""
pass
def build_outline_prompt(self, primary_keywords: List[str], secondary_keywords: List[str],
def build_outline_prompt(self, curated_keywords: Dict[str, Any],
content_angles: List[str], sources: List, search_intent: str,
request, custom_instructions: str = None) -> str:
"""Build the comprehensive outline generation prompt using filtered research data."""
request, custom_instructions: str = None,
selected_content_angle: str = None,
selected_competitive_advantage: str = None) -> str:
"""Build the comprehensive outline generation prompt using curated keyword payload."""
# Use the filtered research data (already cleaned by ResearchDataFilter)
research = request.research
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
secondary_kw_text = ', '.join(secondary_keywords) if secondary_keywords else "None provided"
long_tail_text = ', '.join(research.keyword_analysis.get('long_tail', [])) if research and research.keyword_analysis else "None discovered"
semantic_text = ', '.join(research.keyword_analysis.get('semantic_keywords', [])) if research and research.keyword_analysis else "None discovered"
trending_text = ', '.join(research.keyword_analysis.get('trending_terms', [])) if research and research.keyword_analysis else "None discovered"
content_gap_text = ', '.join(research.keyword_analysis.get('content_gaps', [])) if research and research.keyword_analysis else "None identified"
primary_kw_text = ', '.join(curated_keywords.get('primary', [])) if curated_keywords.get('primary') else (request.topic or ', '.join(getattr(request.research, 'original_keywords', []) or ['the target topic']))
secondary_kw_text = ', '.join(curated_keywords.get('secondary', [])) if curated_keywords.get('secondary') else "None provided"
long_tail_text = ', '.join(curated_keywords.get('long_tail', [])) if curated_keywords.get('long_tail') else "None discovered"
semantic_text = ', '.join(curated_keywords.get('semantic', [])) if curated_keywords.get('semantic') else "None discovered"
trending_text = ', '.join(curated_keywords.get('trending', [])) if curated_keywords.get('trending') else "None discovered"
content_gap_text = ', '.join(curated_keywords.get('content_gap', [])) if curated_keywords.get('content_gap') else "None identified"
content_angle_text = ', '.join(content_angles) if content_angles else "No explicit angles provided; infer compelling angles from research insights."
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
# Extract additional UI-mapped context fields
analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
# Build selected angle prominence section
if selected_content_angle and selected_content_angle.strip():
selected_angle_section = f"""
PRIORITY CONTENT ANGLE (MUST PRIORITIZE):
- This outline MUST be built around the following selected content angle as its primary lens and narrative framework:
"{selected_content_angle}"
- Every major section should connect back to this angle
- Title options should reflect this angle
- The overall narrative arc should follow this angle's implied storyline
"""
else:
selected_angle_section = ""
# Build selected competitive advantage prominence section
if selected_competitive_advantage and selected_competitive_advantage.strip():
selected_advantage_section = f"""
PRIORITY COMPETITIVE ADVANTAGE (MUST LEVERAGE):
- This outline MUST prominently feature and leverage the following competitive advantage throughout the content:
"{selected_competitive_advantage}"
- Weave this advantage into key sections as a differentiator
- Frame the solutions and recommendations around this advantage
- Use this advantage to counter competitor weaknesses mentioned in research
"""
else:
selected_advantage_section = ""
# Import and use the KeywordCurator for the directive section
from .keyword_curator import KeywordCurator
keyword_directives = KeywordCurator().format_for_prompt(curated_keywords)
current_date = datetime.now().strftime("%B %d, %Y")
current_year = datetime.now().year
return f"""Create a comprehensive blog outline for: {primary_kw_text}
CONTEXT:
Current Date: {current_date}
Search Intent: {search_intent}
{f"Keyword Difficulty: {difficulty_score}/10" if difficulty_score is not None else ""}
Target: {request.word_count or 1500} words
Industry: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
KEYWORDS:
Primary: {primary_kw_text}
Secondary: {secondary_kw_text}
Long-tail: {long_tail_text}
Semantic: {semantic_text}
Trending: {trending_text}
Content Gaps: {content_gap_text}
OVERVIEW KEYWORD SUMMARY:
- Primary: {primary_kw_text}
- Secondary: {secondary_kw_text}
- Long-tail: {long_tail_text}
- Semantic: {semantic_text}
- Trending: {trending_text}
- Content Gap: {content_gap_text}
{keyword_directives}
RESEARCH INSIGHTS SYNTHESIS:
{analysis_insights_text}
CONTENT ANGLES / STORYLINES: {content_angle_text}
{selected_angle_section}
{selected_advantage_section}
COMPETITIVE INTELLIGENCE:
Top Competitors: {competitor_text}
Market Opportunities: {opportunity_text}
Competitive Advantages: {advantages_text}
{f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
RESEARCH SOURCES: {len(sources)} authoritative sources available
{f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}
STRATEGIC REQUIREMENTS:
- MUST prioritize and anchor the outline around the selected content angle above all others
- MUST highlight and leverage the selected competitive advantage as a key differentiator
- Follow the KEYWORD PLACEMENT DIRECTIVES — treat the locked keywords as the minimum anchor set; you MAY include closely related intent-matching variations where natural
- Create SEO-optimized headings with natural keyword integration
- Surface the strongest research-backed angles within the outline
- Build logical narrative flow from problem to solution
@@ -78,11 +132,11 @@ Return JSON format:
],
"outline": [
{{
"heading": "Section heading with primary keyword",
"heading": "Section heading",
"subheadings": ["Subheading 1", "Subheading 2", "Subheading 3"],
"key_points": ["Key point 1", "Key point 2", "Key point 3"],
"target_words": 300,
"keywords": ["primary keyword", "secondary keyword"]
"keywords": ["keyword 1", "keyword 2"]
}}
]
}}"""

View File

@@ -76,8 +76,8 @@ class TitleGenerator:
formatted_title += '.'
# Limit length to reasonable blog title size
if len(formatted_title) > 100:
formatted_title = formatted_title[:97] + "..."
if len(formatted_title) > 200:
formatted_title = formatted_title[:197] + "..."
return formatted_title

View File

@@ -155,7 +155,7 @@ class ResearchService:
sources = raw_result.get('sources', [])
search_widget = "" # Exa doesn't provide search widgets
search_queries = raw_result.get('search_queries', [])
grounding_metadata = None # Exa doesn't provide grounding metadata
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
except RuntimeError as e:
# Fail fast - no fallback for testing/debugging
@@ -239,7 +239,7 @@ class ResearchService:
sources = raw_result.get('sources', [])
search_widget = "" # Tavily doesn't provide search widgets
search_queries = raw_result.get('search_queries', [])
grounding_metadata = None # Tavily doesn't provide grounding metadata
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
except RuntimeError as e:
# Fail fast - no fallback for testing/debugging
@@ -482,7 +482,7 @@ class ResearchService:
sources = raw_result.get('sources', []) or []
search_widget = "" # Exa doesn't provide search widgets
search_queries = raw_result.get('search_queries', []) or []
grounding_metadata = None # Exa doesn't provide grounding metadata
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
except RuntimeError as e:
# Fail fast - no fallback for testing/debugging
@@ -568,7 +568,7 @@ class ResearchService:
sources = raw_result.get('sources', []) or []
search_widget = "" # Tavily doesn't provide search widgets
search_queries = raw_result.get('search_queries', []) or []
grounding_metadata = None # Tavily doesn't provide grounding metadata
grounding_metadata = self._build_grounding_metadata_from_sources(sources, search_queries)
except RuntimeError as e:
# Fail fast - no fallback for testing/debugging
@@ -728,6 +728,58 @@ class ResearchService:
return sources
def _build_grounding_metadata_from_sources(self, sources: List[Dict[str, Any]], search_queries: List[str]) -> Optional[GroundingMetadata]:
"""Build GroundingMetadata from Exa/Tavily sources (which lack native Google grounding)."""
if not sources:
return None
grounding_chunks = []
grounding_supports = []
citations = []
for i, source in enumerate(sources):
score = source.get('credibility_score', 0.85)
chunk = GroundingChunk(
title=source.get('title', 'Untitled'),
url=source.get('url', ''),
confidence_score=score,
)
grounding_chunks.append(chunk)
highlights = source.get('highlights', [])
if highlights:
for h in highlights:
grounding_supports.append(GroundingSupport(
confidence_scores=[score],
grounding_chunk_indices=[i],
segment_text=h,
))
else:
excerpt = source.get('excerpt', '')
if excerpt:
grounding_supports.append(GroundingSupport(
confidence_scores=[score],
grounding_chunk_indices=[i],
segment_text=excerpt,
))
citations.append(Citation(
citation_type='inline',
start_index=0,
end_index=0,
text=(highlights[0] if highlights else source.get('excerpt', source.get('title', '')))[:200],
source_indices=[i],
reference=f'Source {i + 1}',
))
return GroundingMetadata(
grounding_chunks=grounding_chunks,
grounding_supports=grounding_supports,
citations=citations,
web_search_queries=search_queries or [],
)
def _normalize_cached_research_data(self, cached_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Normalize cached research data to fix None values in confidence_scores.