chore: bulk commit of local changes across blog writer, SEO dashboard, scheduler, docs-site, and frontend

This commit is contained in:
ajaysi
2026-06-05 12:40:04 +05:30
parent b894bc0abb
commit e54aaa7a3e
74 changed files with 5667 additions and 996 deletions

View File

@@ -40,8 +40,10 @@ class GroundingContextEngine:
}
# Temporal relevance patterns
cy = str(datetime.now().year)
ny = str(datetime.now().year + 1)
self.temporal_patterns = {
'recent': ['2024', '2025', 'latest', 'new', 'recent', 'current', 'updated'],
'recent': [cy, ny, 'latest', 'new', 'recent', 'current', 'updated'],
'trending': ['trend', 'emerging', 'growing', 'increasing', 'rising'],
'evergreen': ['fundamental', 'basic', 'principles', 'foundation', 'core']
}

View File

@@ -137,6 +137,15 @@ class KeywordCurator:
lines.append(f"### Competitive advantage signal (must weave into narrative): {content_gap[0]}")
lines.append(" → This is your primary differentiation hook. Surface it prominently in the unique value section.")
lines.append("")
lines.append("### SUGGESTED SECTION → KEYWORD MAPPING")
lines.append("Map each outline section's keyword focus according to its narrative role:")
lines.append("- Hook / Introduction → lead with primary and trending keywords for timeliness & relevance")
lines.append("- Problem / Pain Point → anchor on secondary and long-tail keywords (informational intent)")
lines.append("- Solution / How-To → weave in primary and secondary keywords for solution-oriented search")
lines.append("- Comparison / Analysis → embed semantic keywords to prevent topical drift into tangents")
lines.append("- Case Studies / Evidence → surface content gap keywords as differentiation proof points")
lines.append("- Future / Trends → leverage trending and content gap keywords for forward-looking authority")
lines.append("")
lines.append("GUIDELINE: Treat these as the primary keyword anchors. You may include closely related")
lines.append("intent-matching variations where natural, but avoid inserting every raw research keyword.")
@@ -176,7 +185,11 @@ class KeywordCurator:
slot_key: Optional[str] = None,
) -> List[str]:
"""
Pick up to N items from a keyword list.
Pick up to N items from a keyword list with diversity sampling.
When the raw list is significantly larger than the limit, selects
evenly-spaced entries to capture semantic diversity rather than
just the first N entries.
Args:
data: The raw keyword_analysis dict.
@@ -184,11 +197,24 @@ class KeywordCurator:
slot_key: The internal slot name for looking up the limit.
Falls back to source_key if not provided.
Returns:
Sliced list of at most N strings.
List of at most N strings with diversity sampling.
"""
limit_key = slot_key or source_key
limit = self.SLOTS.get(limit_key, 5)
raw: Any = data.get(source_key, [])
if not isinstance(raw, list):
return []
return raw[:limit]
if len(raw) <= limit:
return raw
if len(raw) <= limit * 2:
return raw[:limit]
indices = set()
if limit >= 2:
indices.add(0)
indices.add(len(raw) - 1)
step = (len(raw) - 1) / max(limit - 1, 1)
for i in range(1, limit - 1):
indices.add(int(round(i * step)))
else:
indices.add(0)
return [raw[i] for i in sorted(indices) if i < len(raw)][:limit]

View File

@@ -124,7 +124,8 @@ class OutlineGenerator:
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
# Combine AI-generated titles with content angles (full primary keywords for title variety)
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
research_topic = getattr(request, 'topic', '') or ''
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
logger.info(f"Generated optimized outline with {len(balanced_sections)} sections and {len(title_options)} title options")
@@ -224,7 +225,8 @@ class OutlineGenerator:
content_angle_titles = self.title_generator.extract_content_angle_titles(research)
# Combine AI-generated titles with content angles (full primary keywords for title variety)
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords)
research_topic = getattr(request, 'topic', '') or ''
title_options = self.title_generator.combine_title_options(ai_title_options, content_angle_titles, primary_keywords, research_topic)
await task_manager.update_progress(task_id, "✅ Outline generation and optimization completed successfully!")

View File

@@ -36,12 +36,56 @@ class PromptBuilder:
competitor_text = ', '.join(research.competitor_analysis.get('top_competitors', [])) if research and research.competitor_analysis else "Not available"
opportunity_text = ', '.join(research.competitor_analysis.get('opportunities', [])) if research and research.competitor_analysis else "Not available"
advantages_text = ', '.join(research.competitor_analysis.get('competitive_advantages', [])) if research and research.competitor_analysis else "Not available"
competitor_headings_text = ', '.join(research.competitor_analysis.get('competitor_headings', [])[:3]) if research and research.competitor_analysis and research.competitor_analysis.get('competitor_headings') else ""
# Extract additional UI-mapped context fields
analysis_insights_text = (research.keyword_analysis.get('analysis_insights', '') or '') if research and research.keyword_analysis else ''
market_positioning_text = (research.competitor_analysis.get('market_positioning', '') or '') if research and research.competitor_analysis else ''
difficulty_score = research.keyword_analysis.get('difficulty', None) if research and research.keyword_analysis else None
# Extract top 3 authoritative source excerpts as factual data points
source_excerpts_text = ""
if sources:
sorted_sources = sorted(
[s for s in sources if (s.excerpt or s.summary)],
key=lambda s: s.credibility_score or 0.8, reverse=True
)[:3]
excerpts = []
for i, src in enumerate(sorted_sources, 1):
excerpt = src.excerpt or src.summary or ""
if len(excerpt) > 300:
excerpt = excerpt[:297] + "..."
excerpts.append(f" {i}. \"{src.title}\"{excerpt}")
if excerpts:
source_excerpts_text = "FACTUAL DATA POINTS FROM RESEARCH:\n" + "\n".join(excerpts)
# Extract recency: newest source publication date
newest_date_str = ""
if sources:
valid_dates = [s.published_at for s in sources if s.published_at]
if valid_dates:
try:
parsed = [d for d in valid_dates if d[:4].isdigit()]
if parsed:
sorted_dates = sorted(parsed, reverse=True)
newest_date_str = f"Most Recent Source: {sorted_dates[0]}"
except Exception:
pass
# Extract top grounding evidence snippets as verified data points
grounding_evidence_text = ""
if research and research.grounding_metadata and research.grounding_metadata.grounding_supports:
supports = research.grounding_metadata.grounding_supports
top_supports = [s for s in supports if s.segment_text and len(s.segment_text) > 20][:3]
if top_supports:
evidence_parts = []
for i, s in enumerate(top_supports, 1):
text = s.segment_text[:250]
if len(s.segment_text) > 250:
text += "..."
evidence_parts.append(f" {i}. {text}")
grounding_evidence_text = "VERIFIED EVIDENCE (high-confidence snippets):\n" + "\n".join(evidence_parts)
# Build selected angle prominence section
if selected_content_angle and selected_content_angle.strip():
selected_angle_section = f"""
@@ -106,8 +150,14 @@ Top Competitors: {competitor_text}
Market Opportunities: {opportunity_text}
Competitive Advantages: {advantages_text}
{f"Market Positioning: {market_positioning_text}" if market_positioning_text else ""}
{f"Competitor Headings (AVOID duplicating): {competitor_headings_text}" if competitor_headings_text else ""}
RESEARCH SOURCES: {len(sources)} authoritative sources available
{newest_date_str}
{source_excerpts_text}
{grounding_evidence_text}
{f"CUSTOM INSTRUCTIONS: {custom_instructions}" if custom_instructions else ""}

View File

@@ -54,58 +54,58 @@ class TitleGenerator:
Returns:
Formatted title string
"""
if not angle or len(angle.strip()) < 10: # Too short to be a good title
if not angle or len(angle.strip()) < 10:
return ""
# Clean up the angle
cleaned_angle = angle.strip()
# Capitalize first letter of each sentence and proper nouns
sentences = cleaned_angle.split('. ')
formatted_sentences = []
for sentence in sentences:
if sentence.strip():
# Use title case for better formatting
formatted_sentence = sentence.strip().title()
formatted_sentences.append(formatted_sentence)
formatted_title = '. '.join(formatted_sentences)
# Ensure it ends with proper punctuation
if not formatted_title.endswith(('.', '!', '?')):
formatted_title += '.'
# Use sentence case: capitalize first letter, rest as-is
if cleaned_angle:
cleaned_angle = cleaned_angle[0].upper() + cleaned_angle[1:]
# Limit length to reasonable blog title size
if len(formatted_title) > 200:
formatted_title = formatted_title[:197] + "..."
if len(cleaned_angle) > 120:
cleaned_angle = cleaned_angle[:117] + "..."
return formatted_title
return cleaned_angle
def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str]) -> List[str]:
def combine_title_options(self, ai_titles: List[str], content_angle_titles: List[str], primary_keywords: List[str], research_topic: str = "") -> List[str]:
"""
Combine AI-generated titles with content angle titles, ensuring variety and quality.
AI titles (proper SEO titles generated by LLM) take priority.
Content angle titles (long-format descriptions) are used as fallback.
The research topic is the last resort when nothing else exists.
Args:
ai_titles: AI-generated title options
content_angle_titles: Titles derived from content angles
ai_titles: AI-generated title options (proper blog titles, 50-65 chars)
content_angle_titles: Titles derived from content angles (longer, descriptive)
primary_keywords: Primary keywords for fallback generation
research_topic: Original user research topic as ultimate fallback
Returns:
Combined list of title options (max 6 total)
"""
all_titles = []
# Add content angle titles first (these are research-based and valuable)
for title in content_angle_titles[:3]: # Limit to top 3 content angles
if title and title not in all_titles:
all_titles.append(title)
# Add AI-generated titles
# 1. AI-generated titles first (proper SEO titles from LLM)
for title in ai_titles:
if title and title not in all_titles:
all_titles.append(title)
# Note: Removed fallback titles as requested - only use research and AI-generated titles
# 2. Content angle titles as fallback (research-based, but verbose)
for title in content_angle_titles[:3]:
if title and title not in all_titles:
all_titles.append(title)
# 3. Research topic as last resort when nothing was generated
if not all_titles and research_topic:
all_titles.append(research_topic)
# 4. Primary keyword fallback as absolute last resort
if not all_titles and primary_keywords:
kw = primary_keywords[0]
all_titles.append(kw)
# Limit to 6 titles maximum for UI usability
final_titles = all_titles[:6]
@@ -115,9 +115,10 @@ class TitleGenerator:
def generate_fallback_titles(self, primary_keywords: List[str]) -> List[str]:
"""Generate fallback titles when AI generation fails."""
from datetime import datetime
primary_keyword = primary_keywords[0] if primary_keywords else "Topic"
return [
f"The Complete Guide to {primary_keyword}",
f"{primary_keyword}: Everything You Need to Know",
f"How to Master {primary_keyword} in 2024"
f"How to Master {primary_keyword} in {datetime.now().year}"
]

View File

@@ -432,7 +432,7 @@ class ResearchDataFilter:
'how to', 'guide', 'tutorial', 'steps', 'process', 'method',
'best practices', 'tips', 'strategies', 'techniques', 'approach',
'comparison', 'vs', 'versus', 'difference', 'pros and cons',
'trends', 'future', '2024', '2025', 'emerging', 'new'
'trends', 'future', str(datetime.now().year), str(datetime.now().year + 1), 'emerging', 'new'
]
for indicator in actionable_indicators:

View File

@@ -720,7 +720,7 @@ class ResearchService:
url=src.get("url", ""),
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
credibility_score=float(src.get("credibility_score", 0.8)),
published_at=str(src.get("publication_date", "2024-01-01")),
published_at=str(src.get("publication_date", f"{datetime.now().year}-01-01")),
index=src.get("index"),
source_type=src.get("type", "web")
)

View File

@@ -6,6 +6,7 @@ Different strategies for executing research based on depth and focus.
from abc import ABC, abstractmethod
from typing import Dict, Any
from datetime import datetime
from loguru import logger
from models.blog_models import BlogResearchRequest, ResearchMode, ResearchConfig
@@ -87,7 +88,7 @@ Provide analysis in this EXACT format:
- For each: Quote/claim, source URL, published date, metric/context.
REQUIREMENTS:
- Every claim MUST include a source URL (authoritative, recent: 2024-2025 preferred).
- Every claim MUST include a source URL (authoritative, recent: {datetime.now().year}-{datetime.now().year + 1} preferred).
- Use concrete numbers, dates, outcomes; avoid generic advice.
- Keep bullets tight and scannable for spoken narration."""
return prompt.strip()
@@ -116,7 +117,7 @@ Research Topic: "{topic}"{date_filter}{source_filter}
Provide COMPLETE analysis in this EXACT format:
## WHAT'S CHANGED (2024-2025)
## WHAT'S CHANGED ({datetime.now().year}-{datetime.now().year + 1})
[5-7 concise trend bullets with numbers + source URLs]
## PROOF & NUMBERS
@@ -151,7 +152,7 @@ Primary (3), Secondary (8-10), Long-tail (5-7) with intent hints.
VERIFICATION REQUIREMENTS:
- Minimum 2 authoritative sources per major claim.
- Prefer industry reports > research papers > news > blogs.
- 2024-2025 data strongly preferred.
- {datetime.now().year}-{datetime.now().year + 1} data strongly preferred.
- All numbers must include timeframe and methodology.
- Every bullet must be concise for spoken narration and actionable for {target_audience}."""
return prompt.strip()
@@ -213,7 +214,7 @@ REQUIREMENTS:
- Cite all claims with authoritative source URLs
- Include specific numbers, dates, examples
- Focus on actionable insights for {target_audience}
- Use 2024-2025 data when available"""
- Use {datetime.now().year}-{datetime.now().year + 1} data when available"""
return prompt.strip()