chore: push all remaining changes
- Blog writer enhancements and bug fixes - Wix integration improvements - Frontend UI updates - GSC dashboard docs cleanup - Image studio assets - LinkedIn requirements file - Various dependency updates
This commit is contained in:
@@ -6,7 +6,7 @@ Provider parity:
|
||||
- No direct provider coupling here; Google grounding remains in research only
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
from .source_url_manager import SourceURLManager
|
||||
@@ -22,11 +22,12 @@ class EnhancedContentGenerator:
|
||||
self.transitioner = TransitionGenerator()
|
||||
self.flow = FlowAnalyzer()
|
||||
|
||||
async def generate_section(self, section: Any, research: Any, mode: str = "polished", user_id: str = None) -> Dict[str, Any]:
|
||||
async def generate_section(self, section: Any, research: Any = None, mode: str = "polished", user_id: str = None, competitive_advantage: str = "") -> Dict[str, Any]:
|
||||
prev_summary = self.memory.build_previous_sections_summary(limit=2)
|
||||
urls = self.url_manager.pick_relevant_urls(section, research)
|
||||
prompt = self._build_prompt(section, research, prev_summary, urls)
|
||||
# Provider-agnostic text generation (respect GPT_PROVIDER & circuit-breaker)
|
||||
research_context, section_sources = self._build_research_context(section)
|
||||
urls = self.url_manager.pick_relevant_urls(section, research) if not research_context else []
|
||||
global_research_context = self._build_global_research_context(research, competitive_advantage)
|
||||
prompt = self._build_prompt(section, prev_summary, research_context, urls, global_research_context)
|
||||
content_text: str = ""
|
||||
try:
|
||||
ai_resp = llm_text_gen(
|
||||
@@ -40,29 +41,22 @@ class EnhancedContentGenerator:
|
||||
elif isinstance(ai_resp, str):
|
||||
content_text = ai_resp
|
||||
else:
|
||||
# Fallback best-effort extraction
|
||||
content_text = str(ai_resp or "")
|
||||
except Exception as e:
|
||||
content_text = ""
|
||||
|
||||
result = {
|
||||
"content": content_text,
|
||||
"sources": [{"title": u.get("title", ""), "url": u.get("url", "")} for u in urls] if urls else [],
|
||||
"sources": section_sources,
|
||||
}
|
||||
# Generate transition and compute intelligent flow metrics
|
||||
previous_text = prev_summary
|
||||
current_text = result.get("content", "")
|
||||
transition = self.transitioner.generate_transition(previous_text, getattr(section, 'heading', 'This section'), use_llm=True)
|
||||
metrics = self.flow.assess_flow(previous_text, current_text, use_llm=True)
|
||||
|
||||
# Update memory for subsequent sections and store continuity snapshot
|
||||
if current_text:
|
||||
self.memory.update_with_section(getattr(section, 'id', 'unknown'), current_text, use_llm=True)
|
||||
|
||||
# Return enriched result
|
||||
result["transition"] = transition
|
||||
result["continuity_metrics"] = metrics
|
||||
# Persist a lightweight continuity snapshot for API access
|
||||
try:
|
||||
sid = getattr(section, 'id', 'unknown')
|
||||
if not hasattr(self, "_last_continuity"):
|
||||
@@ -72,22 +66,188 @@ class EnhancedContentGenerator:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _build_prompt(self, section: Any, research: Any, prev_summary: str, urls: list) -> str:
|
||||
def _build_research_context(self, section: Any) -> tuple:
|
||||
"""Build a rich research context block from the section's mapped sources.
|
||||
|
||||
Returns (context_string, sources_list) where context_string is the
|
||||
formatted research context for the prompt, and sources_list contains
|
||||
{title, url} dicts for downstream use.
|
||||
|
||||
When section.references is empty, returns ("", []) — the caller should
|
||||
handle this as a research gap and avoid generating unsupported claims.
|
||||
"""
|
||||
references = getattr(section, 'references', []) or []
|
||||
if not references:
|
||||
return ("", [])
|
||||
|
||||
context_parts = []
|
||||
sources_out = []
|
||||
for i, ref in enumerate(references, 1):
|
||||
if isinstance(ref, dict):
|
||||
title = ref.get('title', '')
|
||||
excerpt = ref.get('excerpt', '')
|
||||
highlights = ref.get('highlights', []) or []
|
||||
summary = ref.get('summary', '')
|
||||
url = ref.get('url', '')
|
||||
content = ref.get('content', '') or ''
|
||||
author = ref.get('author', '') or ''
|
||||
source_type = ref.get('source_type', '') or ''
|
||||
credibility_score = ref.get('credibility_score')
|
||||
published_at = ref.get('published_at', '') or ''
|
||||
else:
|
||||
title = getattr(ref, 'title', '')
|
||||
excerpt = getattr(ref, 'excerpt', '')
|
||||
highlights = getattr(ref, 'highlights', []) or []
|
||||
summary = getattr(ref, 'summary', '')
|
||||
url = getattr(ref, 'url', '')
|
||||
content = getattr(ref, 'content', '') or ''
|
||||
author = getattr(ref, 'author', '') or ''
|
||||
source_type = getattr(ref, 'source_type', '') or ''
|
||||
credibility_score = getattr(ref, 'credibility_score', None)
|
||||
published_at = getattr(ref, 'published_at', '') or ''
|
||||
|
||||
sources_out.append({"title": title, "url": url})
|
||||
|
||||
attribution_parts = []
|
||||
if author:
|
||||
attribution_parts.append(f"by {author}")
|
||||
if source_type:
|
||||
attribution_parts.append(f"[{source_type}]")
|
||||
attribution = " ".join(attribution_parts)
|
||||
credibility_tag = ""
|
||||
if credibility_score is not None:
|
||||
try:
|
||||
score = float(credibility_score)
|
||||
if score >= 0.9:
|
||||
credibility_tag = " (high-credibility)"
|
||||
elif score >= 0.75:
|
||||
credibility_tag = " (credible)"
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
recency_tag = ""
|
||||
if published_at:
|
||||
recency_tag = f" (published {published_at[:10]})" if len(published_at) >= 10 else f" (published {published_at})"
|
||||
|
||||
header = f"Source {i}: {title}"
|
||||
if attribution:
|
||||
header += f" {attribution}"
|
||||
header += f"{credibility_tag}{recency_tag}"
|
||||
part = header + "\n"
|
||||
if summary:
|
||||
part += f" Summary: {summary[:1000]}\n"
|
||||
if excerpt:
|
||||
part += f" Key excerpt: {excerpt[:1000]}\n"
|
||||
if content and not summary and not excerpt:
|
||||
part += f" Content: {content[:800]}\n"
|
||||
if highlights:
|
||||
part += " Key findings:\n"
|
||||
for h in highlights[:3]:
|
||||
h_text = h[:500] if h else ''
|
||||
if h_text:
|
||||
part += f" - {h_text}\n"
|
||||
|
||||
context_parts.append(part)
|
||||
|
||||
return ("\n".join(context_parts), sources_out)
|
||||
|
||||
def _build_global_research_context(self, research: Any, competitive_advantage: str = "") -> str:
|
||||
"""Build global research context from the full BlogResearchResponse object.
|
||||
|
||||
Extracts keyword_analysis, competitor_analysis, search_queries,
|
||||
and competitive_advantage into a compact context block that provides
|
||||
the LLM with strategic direction beyond per-section sources.
|
||||
"""
|
||||
if research is None:
|
||||
return ""
|
||||
parts = []
|
||||
|
||||
ka = getattr(research, 'keyword_analysis', None) or {}
|
||||
if ka:
|
||||
primary = ka.get('primary', [])
|
||||
secondary = ka.get('secondary', [])
|
||||
search_intent = ka.get('search_intent', '')
|
||||
kw_lines = []
|
||||
if primary:
|
||||
kw_lines.append(f"Primary keywords: {', '.join(primary[:10])}")
|
||||
if secondary:
|
||||
kw_lines.append(f"Secondary keywords: {', '.join(secondary[:10])}")
|
||||
if search_intent:
|
||||
kw_lines.append(f"Search intent: {search_intent}")
|
||||
if kw_lines:
|
||||
parts.append("=== KEYWORD & SEARCH STRATEGY ===\n" + "\n".join(kw_lines))
|
||||
|
||||
ca = getattr(research, 'competitor_analysis', None) or {}
|
||||
if ca:
|
||||
ca_lines = []
|
||||
content_gaps = ca.get('content_gaps', [])
|
||||
if content_gaps:
|
||||
ca_lines.append(f"Content gaps (address these): {', '.join(content_gaps[:5])}")
|
||||
industry_leaders = ca.get('industry_leaders', [])
|
||||
if industry_leaders:
|
||||
ca_lines.append(f"Industry leaders: {', '.join(industry_leaders[:5])}")
|
||||
opportunities = ca.get('opportunities', [])
|
||||
if opportunities:
|
||||
ca_lines.append(f"Opportunities: {', '.join(opportunities[:5])}")
|
||||
if ca_lines:
|
||||
parts.append("=== COMPETITIVE LANDSCAPE ===\n" + "\n".join(ca_lines))
|
||||
|
||||
sq = getattr(research, 'search_queries', None) or []
|
||||
if sq:
|
||||
parts.append(f"=== SEARCH INTENT SIGNALS ===\nOriginal search queries: {', '.join(sq[:8])}")
|
||||
|
||||
if competitive_advantage:
|
||||
parts.append(f"=== COMPETITIVE ADVANTAGE ===\nEmphasize this differentiator: {competitive_advantage}")
|
||||
|
||||
return "\n\n".join(parts) if parts else ""
|
||||
|
||||
def _build_prompt(self, section: Any, prev_summary: str, research_context: str, urls: list, global_research_context: str = "") -> str:
|
||||
heading = getattr(section, 'heading', 'Section')
|
||||
key_points = getattr(section, 'key_points', [])
|
||||
keywords = getattr(section, 'keywords', [])
|
||||
subheadings = getattr(section, 'subheadings', []) or []
|
||||
target_words = getattr(section, 'target_words', 300)
|
||||
url_block = "\n".join([f"- {u.get('title','')} ({u.get('url','')})" for u in urls]) if urls else "(no specific URLs provided)"
|
||||
|
||||
return (
|
||||
prompt = (
|
||||
f"You are writing the blog section '{heading}'.\n\n"
|
||||
f"Context summary (previous sections): {prev_summary}\n\n"
|
||||
f"Authoring requirements:\n"
|
||||
f"- Target word count: ~{target_words}\n"
|
||||
f"- Use the following key points: {', '.join(key_points)}\n"
|
||||
f"- Include these keywords naturally: {', '.join(keywords)}\n"
|
||||
f"- Cite insights from these sources when relevant (do not output raw URLs):\n{url_block}\n\n"
|
||||
"Write engaging, well-structured markdown with clear paragraphs (2-4 sentences each) separated by double line breaks."
|
||||
)
|
||||
|
||||
if subheadings:
|
||||
prompt += f"- Cover these subtopics: {', '.join(subheadings)}\n"
|
||||
|
||||
if global_research_context:
|
||||
prompt += f"\n{global_research_context}\n\n"
|
||||
|
||||
if research_context:
|
||||
prompt += (
|
||||
f"\nResearch sources for this section (use these facts, statistics, "
|
||||
f"and insights to support your writing):\n{research_context}\n\n"
|
||||
"IMPORTANT: Base your writing on the research sources above. "
|
||||
"Use specific facts, statistics, and data from these sources. "
|
||||
"Do not invent numbers, statistics, or claims not supported by the research.\n"
|
||||
)
|
||||
elif urls:
|
||||
import logging
|
||||
logging.getLogger('content_generator').warning(
|
||||
f"No research context for section '{heading}' — falling back to bare URLs"
|
||||
)
|
||||
url_lines = []
|
||||
for u in urls:
|
||||
if isinstance(u, dict):
|
||||
url_lines.append(f"- {u.get('title','')} ({u.get('url','')})")
|
||||
else:
|
||||
url_lines.append(f"- {u}")
|
||||
prompt += f"\nReference URLs (consult for additional context):\n" + "\n".join(url_lines) + "\n"
|
||||
|
||||
prompt += (
|
||||
"\nWrite engaging, well-structured markdown with clear paragraphs "
|
||||
"(2-4 sentences each) separated by double line breaks."
|
||||
)
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
|
||||
@@ -7,10 +7,9 @@ Uses Gemini API for intelligent analysis while minimizing API calls through cach
|
||||
from typing import Dict, Optional
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
# Import the common gemini provider
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
# Provider-agnostic LLM dispatcher (respects GPT_PROVIDER env var)
|
||||
from services.llm_providers.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
class FlowAnalyzer:
|
||||
@@ -21,7 +20,7 @@ class FlowAnalyzer:
|
||||
self._rule_cache: Dict[str, Dict[str, float]] = {}
|
||||
logger.info("✅ FlowAnalyzer initialized with LLM-based analysis")
|
||||
|
||||
def assess_flow(self, previous_text: str, current_text: str, use_llm: bool = True) -> Dict[str, float]:
|
||||
def assess_flow(self, previous_text: str, current_text: str, use_llm: bool = True, user_id: str = None) -> Dict[str, float]:
|
||||
"""
|
||||
Return flow metrics in range 0..1.
|
||||
|
||||
@@ -29,6 +28,7 @@ class FlowAnalyzer:
|
||||
previous_text: Previous section content
|
||||
current_text: Current section content
|
||||
use_llm: Whether to use LLM analysis (default: True for significant content)
|
||||
user_id: Clerk user ID for subscription checking
|
||||
"""
|
||||
if not current_text:
|
||||
return {"flow": 0.0, "consistency": 0.0, "progression": 0.0}
|
||||
@@ -46,7 +46,7 @@ class FlowAnalyzer:
|
||||
|
||||
if should_use_llm:
|
||||
try:
|
||||
metrics = self._llm_flow_analysis(previous_text, current_text)
|
||||
metrics = self._llm_flow_analysis(previous_text, current_text, user_id=user_id)
|
||||
self._cache[cache_key] = metrics
|
||||
logger.info("LLM-based flow analysis completed")
|
||||
return metrics
|
||||
@@ -71,8 +71,8 @@ class FlowAnalyzer:
|
||||
# Use LLM if: substantial content (>100 words) OR has meaningful previous context
|
||||
return word_count > 100 or has_previous
|
||||
|
||||
def _llm_flow_analysis(self, previous_text: str, current_text: str) -> Dict[str, float]:
|
||||
"""Use Gemini API for intelligent flow analysis."""
|
||||
def _llm_flow_analysis(self, previous_text: str, current_text: str, user_id: str = None) -> Dict[str, float]:
|
||||
"""Use LLM for intelligent flow analysis (provider-agnostic)."""
|
||||
|
||||
# Truncate content to minimize tokens while keeping context
|
||||
prev_truncated = (previous_text[-300:] if previous_text else "") if previous_text else ""
|
||||
@@ -103,22 +103,20 @@ Return ONLY a JSON object with these exact keys: flow, consistency, progression
|
||||
}
|
||||
|
||||
try:
|
||||
result = gemini_structured_json_response(
|
||||
result = llm_text_gen(
|
||||
prompt=prompt,
|
||||
schema=schema,
|
||||
temperature=0.2, # Low temperature for consistent scoring
|
||||
max_tokens=1000 # Increased tokens for better analysis
|
||||
json_struct=schema,
|
||||
system_prompt=None,
|
||||
user_id=user_id,
|
||||
temperature=0.2,
|
||||
max_tokens=1000
|
||||
)
|
||||
|
||||
if result.parsed:
|
||||
return {
|
||||
"flow": float(result.parsed.get("flow", 0.6)),
|
||||
"consistency": float(result.parsed.get("consistency", 0.6)),
|
||||
"progression": float(result.parsed.get("progression", 0.6))
|
||||
}
|
||||
else:
|
||||
logger.warning("LLM response parsing failed, using fallback")
|
||||
return self._rule_based_analysis(previous_text, current_text)
|
||||
return {
|
||||
"flow": float(result.get("flow", 0.6)),
|
||||
"consistency": float(result.get("consistency", 0.6)),
|
||||
"progression": float(result.get("progression", 0.6))
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM flow analysis error: {e}")
|
||||
|
||||
@@ -28,18 +28,17 @@ class IntroductionGenerator:
|
||||
) -> str:
|
||||
"""Build a prompt for generating blog introductions."""
|
||||
|
||||
# Extract key research insights
|
||||
keyword_analysis = research.keyword_analysis or {}
|
||||
content_angles = research.suggested_angles or []
|
||||
competitor_analysis = research.competitor_analysis or {}
|
||||
search_queries = research.search_queries or []
|
||||
|
||||
# Get a summary of the first few sections for context
|
||||
section_summaries = []
|
||||
for i, section in enumerate(outline[:3], 1):
|
||||
section_id = section.id
|
||||
content = sections_content.get(section_id, '')
|
||||
if content:
|
||||
# Take first 200 chars as summary
|
||||
summary = content[:200] + '...' if len(content) > 200 else content
|
||||
summary = content[:300] + '...' if len(content) > 300 else content
|
||||
section_summaries.append(f"{i}. {section.heading}: {summary}")
|
||||
|
||||
sections_text = '\n'.join(section_summaries) if section_summaries else "Content sections are being generated."
|
||||
@@ -47,13 +46,56 @@ class IntroductionGenerator:
|
||||
primary_kw_text = ', '.join(primary_keywords) if primary_keywords else "the topic"
|
||||
content_angle_text = ', '.join(content_angles[:3]) if content_angles else "General insights"
|
||||
|
||||
return f"""Generate exactly 3 varied blog introductions for the following blog post.
|
||||
# Build keyword strategy block from actual keyword_analysis
|
||||
keyword_block = ""
|
||||
all_keywords = []
|
||||
if keyword_analysis:
|
||||
primary_kw = keyword_analysis.get('primary', [])
|
||||
secondary_kw = keyword_analysis.get('secondary', [])
|
||||
if primary_kw:
|
||||
all_keywords.extend(primary_kw[:5])
|
||||
if secondary_kw:
|
||||
all_keywords.extend(secondary_kw[:5])
|
||||
si = keyword_analysis.get('search_intent', '')
|
||||
if si:
|
||||
keyword_block += f"\nSearch intent: {si}"
|
||||
if all_keywords:
|
||||
keyword_block = f"Target keywords: {', '.join(all_keywords)}" + keyword_block
|
||||
|
||||
# Build competitive landscape block
|
||||
competitive_block = ""
|
||||
if competitor_analysis:
|
||||
gaps = competitor_analysis.get('content_gaps', [])
|
||||
leaders = competitor_analysis.get('industry_leaders', [])
|
||||
opportunities = competitor_analysis.get('opportunities', [])
|
||||
advantages = competitor_analysis.get('competitive_advantages', [])
|
||||
comp_lines = []
|
||||
if advantages:
|
||||
comp_lines.append(f"Key differentiators: {', '.join(advantages[:3])}")
|
||||
if gaps:
|
||||
comp_lines.append(f"Content gaps to address: {', '.join(gaps[:3])}")
|
||||
if leaders:
|
||||
comp_lines.append(f"Industry leaders: {', '.join(leaders[:3])}")
|
||||
if opportunities:
|
||||
comp_lines.append(f"Opportunities: {', '.join(opportunities[:3])}")
|
||||
if comp_lines:
|
||||
competitive_block = "\n".join(comp_lines)
|
||||
|
||||
# Build search intent context
|
||||
search_block = ""
|
||||
if search_queries:
|
||||
search_block = f"Original search queries: {', '.join(search_queries[:5])}"
|
||||
|
||||
prompt = f"""Generate exactly 3 varied blog introductions for the following blog post.
|
||||
|
||||
BLOG TITLE: {blog_title}
|
||||
|
||||
PRIMARY KEYWORDS: {primary_kw_text}
|
||||
SEARCH INTENT: {search_intent}
|
||||
CONTENT ANGLES: {content_angle_text}
|
||||
{keyword_block}
|
||||
{f"COMPETITIVE LANDSCAPE:\n{competitive_block}" if competitive_block else ""}
|
||||
{f"SEARCH CONTEXT:\n{search_block}" if search_block else ""}
|
||||
|
||||
BLOG CONTENT SUMMARY:
|
||||
{sections_text}
|
||||
@@ -69,6 +111,7 @@ REQUIREMENTS FOR EACH INTRODUCTION:
|
||||
3. Third: Story/statistic-focused (use a compelling fact or narrative hook)
|
||||
- Maintain a professional yet engaging tone
|
||||
- Avoid generic phrases - be specific and benefit-driven
|
||||
- Where possible, incorporate specific insights from the competitive landscape and search intent above
|
||||
|
||||
Return ONLY a JSON array of exactly 3 introductions:
|
||||
[
|
||||
@@ -76,6 +119,7 @@ Return ONLY a JSON array of exactly 3 introductions:
|
||||
"Second introduction (80-120 words, benefit-focused)",
|
||||
"Third introduction (80-120 words, story/statistic-focused)"
|
||||
]"""
|
||||
return prompt
|
||||
|
||||
def get_introduction_schema(self) -> Dict[str, Any]:
|
||||
"""Get the JSON schema for introduction generation."""
|
||||
|
||||
Reference in New Issue
Block a user