Added blog writer implementation - WIP
This commit is contained in:
649
backend/services/blog_writer/blog_service.py
Normal file
649
backend/services/blog_writer/blog_service.py
Normal file
@@ -0,0 +1,649 @@
|
||||
from typing import Any, Dict, List
|
||||
from loguru import logger
|
||||
from services.llm_providers.gemini_provider import gemini_structured_json_response
|
||||
|
||||
from models.blog_models import (
|
||||
BlogResearchRequest,
|
||||
BlogResearchResponse,
|
||||
BlogOutlineRequest,
|
||||
BlogOutlineResponse,
|
||||
BlogOutlineRefineRequest,
|
||||
BlogSectionRequest,
|
||||
BlogSectionResponse,
|
||||
BlogOptimizeRequest,
|
||||
BlogOptimizeResponse,
|
||||
BlogSEOAnalyzeRequest,
|
||||
BlogSEOAnalyzeResponse,
|
||||
BlogSEOMetadataRequest,
|
||||
BlogSEOMetadataResponse,
|
||||
BlogPublishRequest,
|
||||
BlogPublishResponse,
|
||||
ResearchSource,
|
||||
BlogOutlineSection,
|
||||
)
|
||||
|
||||
|
||||
class BlogWriterService:
|
||||
"""Service layer for AI Blog Writer (stub implementations for scaffolding)."""
|
||||
|
||||
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
|
||||
"""
|
||||
Stage 1: Research & Strategy (AI Orchestration)
|
||||
Uses ONLY Gemini's native Google Search grounding - ONE API call for everything.
|
||||
Follows LinkedIn service pattern for efficiency and cost optimization.
|
||||
"""
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
|
||||
gemini = GeminiGroundedProvider()
|
||||
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
|
||||
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
|
||||
|
||||
# Single comprehensive research prompt - Gemini handles Google Search automatically
|
||||
research_prompt = f"""
|
||||
Research the topic "{topic}" in the {industry} industry for {target_audience} audience. Provide a comprehensive analysis including:
|
||||
|
||||
1. Current trends and insights (2024-2025)
|
||||
2. Key statistics and data points with sources
|
||||
3. Industry expert opinions and quotes
|
||||
4. Recent developments and news
|
||||
5. Market analysis and forecasts
|
||||
6. Best practices and case studies
|
||||
7. Keyword analysis: primary, secondary, and long-tail opportunities
|
||||
8. Competitor analysis: top players and content gaps
|
||||
9. Content angle suggestions: 5 compelling angles for blog posts
|
||||
|
||||
Focus on factual, up-to-date information from credible sources.
|
||||
Include specific data points, percentages, and recent developments.
|
||||
Structure your response with clear sections for each analysis area.
|
||||
"""
|
||||
|
||||
# Single Gemini call with native Google Search grounding - no fallbacks
|
||||
gemini_result = await gemini.generate_grounded_content(
|
||||
prompt=research_prompt,
|
||||
content_type="research",
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
|
||||
# Extract search widget and queries for UI display
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
|
||||
# Parse the comprehensive response for different analysis components
|
||||
content = gemini_result.get("content", "")
|
||||
keyword_analysis = self._parse_keyword_analysis(content, request.keywords)
|
||||
competitor_analysis = self._parse_competitor_analysis(content)
|
||||
suggested_angles = self._parse_content_angles(content, topic, industry)
|
||||
|
||||
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
||||
|
||||
return BlogResearchResponse(
|
||||
success=True,
|
||||
sources=sources,
|
||||
keyword_analysis=keyword_analysis,
|
||||
competitor_analysis=competitor_analysis,
|
||||
suggested_angles=suggested_angles,
|
||||
# Add search widget and queries for UI display
|
||||
search_widget=search_widget if 'search_widget' in locals() else "",
|
||||
search_queries=search_queries if 'search_queries' in locals() else [],
|
||||
)
|
||||
|
||||
def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> List[ResearchSource]:
|
||||
"""Extract sources from Gemini grounding metadata."""
|
||||
sources = []
|
||||
|
||||
# The Gemini grounded provider already extracts sources and puts them in the 'sources' field
|
||||
raw_sources = gemini_result.get("sources", [])
|
||||
for src in raw_sources:
|
||||
source = ResearchSource(
|
||||
title=src.get("title", "Untitled"),
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
published_at=str(src.get("publication_date", "2024-01-01"))
|
||||
)
|
||||
sources.append(source)
|
||||
|
||||
return sources
|
||||
|
||||
def _parse_keyword_analysis(self, content: str, original_keywords: List[str]) -> Dict[str, Any]:
|
||||
"""Parse keyword analysis from the research content."""
|
||||
# Extract keywords from content sections
|
||||
lines = content.split('\n')
|
||||
keyword_section = []
|
||||
in_keyword_section = False
|
||||
|
||||
for line in lines:
|
||||
if 'keyword' in line.lower() and ('analysis' in line.lower() or 'primary' in line.lower()):
|
||||
in_keyword_section = True
|
||||
continue
|
||||
if in_keyword_section and line.strip():
|
||||
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
||||
break
|
||||
keyword_section.append(line.strip())
|
||||
|
||||
return {
|
||||
"primary": original_keywords[:1] if original_keywords else [],
|
||||
"secondary": original_keywords[1:] if len(original_keywords) > 1 else [],
|
||||
"long_tail": [f"{kw} guide" for kw in original_keywords[:2]] if original_keywords else [],
|
||||
"search_intent": "informational",
|
||||
"difficulty": 6,
|
||||
"content_gaps": [f"{kw} best practices" for kw in original_keywords[:2]] if original_keywords else [],
|
||||
"analysis_content": "\n".join(keyword_section) if keyword_section else content[:200]
|
||||
}
|
||||
|
||||
def _parse_competitor_analysis(self, content: str) -> Dict[str, Any]:
|
||||
"""Parse competitor analysis from the research content."""
|
||||
lines = content.split('\n')
|
||||
competitor_section = []
|
||||
in_competitor_section = False
|
||||
|
||||
for line in lines:
|
||||
if 'competitor' in line.lower() and ('analysis' in line.lower() or 'top' in line.lower()):
|
||||
in_competitor_section = True
|
||||
continue
|
||||
if in_competitor_section and line.strip():
|
||||
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
||||
break
|
||||
competitor_section.append(line.strip())
|
||||
|
||||
return {
|
||||
"top_competitors": [],
|
||||
"content_gaps": [],
|
||||
"opportunities": [],
|
||||
"analysis_notes": "\n".join(competitor_section) if competitor_section else "Competitor analysis from research"
|
||||
}
|
||||
|
||||
def _parse_content_angles(self, content: str, topic: str, industry: str) -> List[str]:
|
||||
"""Parse content angles from the research content."""
|
||||
lines = content.split('\n')
|
||||
angles_section = []
|
||||
in_angles_section = False
|
||||
|
||||
for line in lines:
|
||||
if 'angle' in line.lower() and ('suggest' in line.lower() or 'content' in line.lower()):
|
||||
in_angles_section = True
|
||||
continue
|
||||
if in_angles_section and line.strip():
|
||||
if line.startswith(('1.', '2.', '3.', '4.', '5.', '6.', '7.', '8.', '9.')):
|
||||
break
|
||||
if line.strip() and not line.startswith(('•', '-', '*')):
|
||||
angles_section.append(line.strip())
|
||||
|
||||
# If no angles found in content, use fallback
|
||||
if not angles_section:
|
||||
angles_section = [
|
||||
f"How {topic} is Transforming {industry}",
|
||||
f"Latest {topic} Trends: What You Need to Know",
|
||||
f"{topic} Best Practices for {industry}",
|
||||
f"Case Study: {topic} Success Stories",
|
||||
f"The Future of {topic} in {industry}"
|
||||
]
|
||||
|
||||
return angles_section[:5] # Return top 5 angles
|
||||
|
||||
|
||||
async def generate_outline(self, request: BlogOutlineRequest) -> BlogOutlineResponse:
|
||||
"""
|
||||
Stage 2: Content Planning with AI-generated outline using research results
|
||||
Uses Gemini with research data to create comprehensive, SEO-optimized outline
|
||||
"""
|
||||
# Extract research insights
|
||||
research = request.research
|
||||
primary_keywords = research.keyword_analysis.get('primary', [])
|
||||
secondary_keywords = research.keyword_analysis.get('secondary', [])
|
||||
content_angles = research.suggested_angles
|
||||
sources = research.sources
|
||||
search_intent = research.keyword_analysis.get('search_intent', 'informational')
|
||||
|
||||
# Build sophisticated outline generation prompt with advanced content strategy
|
||||
outline_prompt = f"""
|
||||
You are a world-class content strategist and SEO expert with 15+ years of experience creating viral, high-converting blog content. Your outlines have generated millions of views and driven significant business results.
|
||||
|
||||
CONTENT STRATEGY BRIEF:
|
||||
Topic: {', '.join(primary_keywords)}
|
||||
Search Intent: {search_intent}
|
||||
Target Word Count: {request.word_count or 1500} words
|
||||
Industry Context: {getattr(request.persona, 'industry', 'General') if request.persona else 'General'}
|
||||
Audience: {getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'}
|
||||
|
||||
RESEARCH INTELLIGENCE:
|
||||
Primary Keywords: {', '.join(primary_keywords)}
|
||||
Secondary Keywords: {', '.join(secondary_keywords)}
|
||||
Long-tail Opportunities: {', '.join(research.keyword_analysis.get('long_tail', [])[:5])}
|
||||
|
||||
Content Angles Discovered:
|
||||
{chr(10).join([f"• {angle}" for angle in content_angles[:6]])}
|
||||
|
||||
Research Sources Available: {len(sources)} authoritative sources with current data
|
||||
|
||||
STRATEGIC OUTLINE REQUIREMENTS:
|
||||
|
||||
1. CONTENT ARCHITECTURE:
|
||||
- Create 5-7 sections that follow a logical progression
|
||||
- Each section must have a clear purpose and value proposition
|
||||
- Build a narrative arc that keeps readers engaged throughout
|
||||
- Include strategic content gaps that competitors miss
|
||||
|
||||
2. SEO OPTIMIZATION:
|
||||
- Naturally integrate primary keywords in H2 headings (not forced)
|
||||
- Use secondary keywords in subheadings and key points
|
||||
- Include long-tail keywords in natural language
|
||||
- Optimize for featured snippets and voice search
|
||||
- Create semantic keyword clusters
|
||||
|
||||
3. READER ENGAGEMENT:
|
||||
- Start with a compelling hook that addresses pain points
|
||||
- Use storytelling elements and real-world examples
|
||||
- Include actionable insights readers can implement immediately
|
||||
- Create sections that encourage social sharing
|
||||
- End with a strong call-to-action
|
||||
|
||||
4. CONTENT DEPTH:
|
||||
- Each section: 2-4 specific, actionable subheadings
|
||||
- Each section: 4-6 key points with research-backed insights
|
||||
- Include data points, statistics, and case studies where relevant
|
||||
- Address common objections and questions
|
||||
- Provide unique angles not covered by competitors
|
||||
|
||||
5. WORD COUNT DISTRIBUTION:
|
||||
- Introduction: 10-15% of total words
|
||||
- Main sections: 70-80% of total words (distributed strategically)
|
||||
- Conclusion: 10-15% of total words
|
||||
- Total target: {request.word_count or 1500} words
|
||||
|
||||
6. COMPETITIVE ADVANTAGE:
|
||||
- Include fresh perspectives from recent research
|
||||
- Address emerging trends and future implications
|
||||
- Provide deeper insights than surface-level content
|
||||
- Include practical tools, frameworks, or templates
|
||||
- Reference authoritative sources and data
|
||||
|
||||
TITLE STRATEGY:
|
||||
Create 3 distinct title options that:
|
||||
- Include primary keywords naturally
|
||||
- Promise clear value to readers
|
||||
- Create curiosity and urgency
|
||||
- Are optimized for click-through rates
|
||||
- Work well for social media sharing
|
||||
|
||||
CRITICAL: Respond ONLY with valid JSON. No additional text or explanations.
|
||||
|
||||
JSON FORMAT:
|
||||
{{
|
||||
"title_options": [
|
||||
"Compelling title with primary keyword and benefit",
|
||||
"Question-based title that creates curiosity",
|
||||
"How-to title with specific outcome promise"
|
||||
],
|
||||
"outline": [
|
||||
{{
|
||||
"heading": "Strategic section title with primary keyword",
|
||||
"subheadings": [
|
||||
"Specific, actionable subheading 1",
|
||||
"Data-driven subheading 2",
|
||||
"Case study or example subheading 3"
|
||||
],
|
||||
"key_points": [
|
||||
"Research-backed insight with specific data",
|
||||
"Actionable step readers can take immediately",
|
||||
"Common mistake to avoid with explanation",
|
||||
"Advanced tip that provides competitive advantage",
|
||||
"Real-world example or case study"
|
||||
],
|
||||
"target_words": 300,
|
||||
"keywords": ["primary keyword", "secondary keyword", "long-tail phrase"]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
logger.info("Generating AI-powered outline using research results")
|
||||
|
||||
# Define the schema for structured JSON response
|
||||
outline_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title_options": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "3 SEO-optimized title options"
|
||||
},
|
||||
"outline": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {"type": "string"},
|
||||
"heading": {"type": "string"},
|
||||
"subheadings": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"key_points": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"word_count": {"type": "integer"},
|
||||
"keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Keywords to focus on in this section"
|
||||
}
|
||||
},
|
||||
"required": ["id", "heading", "subheadings", "key_points", "word_count", "keywords"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["title_options", "outline"]
|
||||
}
|
||||
|
||||
# Generate outline using structured JSON response (no grounding needed)
|
||||
outline_data = gemini_structured_json_response(
|
||||
prompt=outline_prompt,
|
||||
schema=outline_schema,
|
||||
temperature=0.3,
|
||||
max_tokens=3000
|
||||
)
|
||||
|
||||
# Check for errors in the response
|
||||
if isinstance(outline_data, dict) and 'error' in outline_data:
|
||||
logger.error(f"Gemini structured response error: {outline_data['error']}")
|
||||
raise ValueError(f"AI outline generation failed: {outline_data['error']}")
|
||||
|
||||
# Validate required fields
|
||||
if not isinstance(outline_data, dict) or 'outline' not in outline_data or not isinstance(outline_data['outline'], list):
|
||||
logger.error(f"Invalid outline structure: {outline_data}")
|
||||
raise ValueError("Invalid outline structure in Gemini response")
|
||||
|
||||
# Convert to BlogOutlineSection objects
|
||||
outline_sections = []
|
||||
for i, section_data in enumerate(outline_data.get('outline', [])):
|
||||
if not isinstance(section_data, dict) or 'heading' not in section_data:
|
||||
logger.warning(f"Skipping invalid section data at index {i}")
|
||||
continue
|
||||
|
||||
section = BlogOutlineSection(
|
||||
id=f"s{i+1}",
|
||||
heading=section_data.get('heading', f'Section {i+1}'),
|
||||
subheadings=section_data.get('subheadings', []),
|
||||
key_points=section_data.get('key_points', []),
|
||||
references=sources[:2] if i < 2 else [], # Assign sources to first 2 sections
|
||||
target_words=section_data.get('target_words', 300),
|
||||
keywords=section_data.get('keywords', [])
|
||||
)
|
||||
outline_sections.append(section)
|
||||
|
||||
title_options = outline_data.get('title_options', [])
|
||||
if not title_options:
|
||||
raise ValueError("No title options provided in Gemini response")
|
||||
|
||||
logger.info(f"Generated outline with {len(outline_sections)} sections and {len(title_options)} title options")
|
||||
|
||||
return BlogOutlineResponse(
|
||||
success=True,
|
||||
title_options=title_options,
|
||||
outline=outline_sections
|
||||
)
|
||||
|
||||
|
||||
async def refine_outline(self, request: BlogOutlineRefineRequest) -> BlogOutlineResponse:
|
||||
"""
|
||||
Refine outline with HITL (Human-in-the-Loop) operations
|
||||
Supports add, remove, move, merge, rename operations
|
||||
"""
|
||||
outline = request.outline.copy()
|
||||
operation = request.operation.lower()
|
||||
section_id = request.section_id
|
||||
payload = request.payload or {}
|
||||
|
||||
try:
|
||||
if operation == 'add':
|
||||
# Add new section
|
||||
new_section = BlogOutlineSection(
|
||||
id=f"s{len(outline) + 1}",
|
||||
heading=payload.get('heading', 'New Section'),
|
||||
subheadings=payload.get('subheadings', []),
|
||||
key_points=payload.get('key_points', []),
|
||||
references=[],
|
||||
target_words=payload.get('target_words', 300)
|
||||
)
|
||||
outline.append(new_section)
|
||||
logger.info(f"Added new section: {new_section.heading}")
|
||||
|
||||
elif operation == 'remove' and section_id:
|
||||
# Remove section
|
||||
outline = [s for s in outline if s.id != section_id]
|
||||
logger.info(f"Removed section: {section_id}")
|
||||
|
||||
elif operation == 'rename' and section_id:
|
||||
# Rename section
|
||||
for section in outline:
|
||||
if section.id == section_id:
|
||||
section.heading = payload.get('heading', section.heading)
|
||||
break
|
||||
logger.info(f"Renamed section {section_id} to: {payload.get('heading')}")
|
||||
|
||||
elif operation == 'move' and section_id:
|
||||
# Move section (reorder)
|
||||
direction = payload.get('direction', 'down') # 'up' or 'down'
|
||||
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
|
||||
|
||||
if current_index != -1:
|
||||
if direction == 'up' and current_index > 0:
|
||||
outline[current_index], outline[current_index - 1] = outline[current_index - 1], outline[current_index]
|
||||
elif direction == 'down' and current_index < len(outline) - 1:
|
||||
outline[current_index], outline[current_index + 1] = outline[current_index + 1], outline[current_index]
|
||||
logger.info(f"Moved section {section_id} {direction}")
|
||||
|
||||
elif operation == 'merge' and section_id:
|
||||
# Merge with next section
|
||||
current_index = next((i for i, s in enumerate(outline) if s.id == section_id), -1)
|
||||
if current_index != -1 and current_index < len(outline) - 1:
|
||||
current_section = outline[current_index]
|
||||
next_section = outline[current_index + 1]
|
||||
|
||||
# Merge sections
|
||||
current_section.heading = f"{current_section.heading} & {next_section.heading}"
|
||||
current_section.subheadings.extend(next_section.subheadings)
|
||||
current_section.key_points.extend(next_section.key_points)
|
||||
current_section.references.extend(next_section.references)
|
||||
current_section.target_words = (current_section.target_words or 0) + (next_section.target_words or 0)
|
||||
|
||||
# Remove the next section
|
||||
outline.pop(current_index + 1)
|
||||
logger.info(f"Merged section {section_id} with next section")
|
||||
|
||||
elif operation == 'update' and section_id:
|
||||
# Update section details
|
||||
for section in outline:
|
||||
if section.id == section_id:
|
||||
if 'heading' in payload:
|
||||
section.heading = payload['heading']
|
||||
if 'subheadings' in payload:
|
||||
section.subheadings = payload['subheadings']
|
||||
if 'key_points' in payload:
|
||||
section.key_points = payload['key_points']
|
||||
if 'target_words' in payload:
|
||||
section.target_words = payload['target_words']
|
||||
break
|
||||
logger.info(f"Updated section {section_id}")
|
||||
|
||||
# Reassign IDs to maintain order
|
||||
for i, section in enumerate(outline):
|
||||
section.id = f"s{i+1}"
|
||||
|
||||
return BlogOutlineResponse(
|
||||
success=True,
|
||||
title_options=["Refined Outline"],
|
||||
outline=outline
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Outline refinement failed: {e}")
|
||||
return BlogOutlineResponse(
|
||||
success=False,
|
||||
title_options=["Error"],
|
||||
outline=request.outline
|
||||
)
|
||||
|
||||
async def generate_section(self, request: BlogSectionRequest) -> BlogSectionResponse:
|
||||
# TODO: Generate section markdown incorporating references and persona/tone
|
||||
md = f"## {request.section.heading}\n\nThis section content will be generated here.\n"
|
||||
return BlogSectionResponse(success=True, markdown=md, citations=request.section.references)
|
||||
|
||||
async def optimize_section(self, request: BlogOptimizeRequest) -> BlogOptimizeResponse:
|
||||
# TODO: Run readability/EEAT optimization and return diff
|
||||
return BlogOptimizeResponse(success=True, optimized=request.content, diff_preview=None)
|
||||
|
||||
async def hallucination_check(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Run hallucination detection on provided text using existing detector service."""
|
||||
text = str(payload.get("text", "") or "").strip()
|
||||
if not text:
|
||||
return {"success": False, "error": "No text provided"}
|
||||
|
||||
# Prefer direct service use over HTTP proxy
|
||||
try:
|
||||
from services.hallucination_detector import HallucinationDetector
|
||||
detector = HallucinationDetector()
|
||||
result = await detector.detect_hallucinations(text)
|
||||
|
||||
# Serialize dataclass-like result to dict
|
||||
claims = []
|
||||
for c in result.claims:
|
||||
claims.append({
|
||||
"text": c.text,
|
||||
"confidence": c.confidence,
|
||||
"assessment": c.assessment,
|
||||
"supporting_sources": c.supporting_sources,
|
||||
"refuting_sources": c.refuting_sources,
|
||||
"reasoning": c.reasoning,
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"overall_confidence": result.overall_confidence,
|
||||
"total_claims": result.total_claims,
|
||||
"supported_claims": result.supported_claims,
|
||||
"refuted_claims": result.refuted_claims,
|
||||
"insufficient_claims": result.insufficient_claims,
|
||||
"timestamp": result.timestamp,
|
||||
"claims": claims,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def seo_analyze(self, request: BlogSEOAnalyzeRequest) -> BlogSEOAnalyzeResponse:
|
||||
"""Wrap existing SEO tools to produce unified analysis for blog content."""
|
||||
from services.seo_tools.on_page_seo_service import OnPageSEOService
|
||||
from services.seo_tools.image_alt_service import ImageAltService
|
||||
from services.seo_tools.content_strategy_service import ContentStrategyService
|
||||
|
||||
content = request.content or ""
|
||||
target_keywords = request.keywords or []
|
||||
|
||||
# On-page analysis (treat content as a virtual URL/document for now)
|
||||
on_page = OnPageSEOService()
|
||||
on_page_result = await on_page.analyze_on_page_seo(url="about:blank", target_keywords=target_keywords)
|
||||
|
||||
# Image alt coverage (placeholder: no images in raw content yet)
|
||||
try:
|
||||
image_alt_service = ImageAltService()
|
||||
image_alt_status = {"total_images": 0, "missing_alt": 0}
|
||||
except Exception:
|
||||
image_alt_status = {"total_images": 0, "missing_alt": 0}
|
||||
|
||||
# Strategy hints (keywords/topics)
|
||||
try:
|
||||
strategy = ContentStrategyService()
|
||||
strategy_hints = await strategy.analyze_content_topics(content=content)
|
||||
except Exception:
|
||||
strategy_hints = {"topics": [], "gaps": []}
|
||||
|
||||
# Lightweight markdown parsing for headings/links/keywords
|
||||
import re
|
||||
content_text = content or ""
|
||||
words = re.findall(r"[A-Za-z0-9']+", content_text)
|
||||
total_words = max(len(words), 1)
|
||||
heading_lines = content_text.splitlines()
|
||||
h1 = sum(1 for ln in heading_lines if ln.startswith('# '))
|
||||
h2 = sum(1 for ln in heading_lines if ln.startswith('## '))
|
||||
h3 = sum(1 for ln in heading_lines if ln.startswith('### '))
|
||||
md_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content_text)
|
||||
external_links = [u for (_t, u) in md_links if u.startswith('http')]
|
||||
|
||||
# Keyword density
|
||||
density_map: Dict[str, Any] = {"target_keywords": target_keywords}
|
||||
for kw in target_keywords:
|
||||
try:
|
||||
occurrences = len(re.findall(re.escape(kw), content_text, flags=re.IGNORECASE))
|
||||
except re.error:
|
||||
occurrences = 0
|
||||
density_map[kw] = {
|
||||
"occurrences": occurrences,
|
||||
"density": round(occurrences / total_words, 4)
|
||||
}
|
||||
|
||||
# Build unified response
|
||||
recommendations: List[str] = []
|
||||
if isinstance(on_page_result.get("recommendations"), list):
|
||||
recommendations.extend(on_page_result["recommendations"])
|
||||
if strategy_hints.get("gaps"):
|
||||
recommendations.append("Cover missing topics: " + ", ".join(strategy_hints["gaps"]))
|
||||
if not external_links:
|
||||
recommendations.append("Add at least one credible external link to authoritative sources.")
|
||||
if h2 < 2:
|
||||
recommendations.append("Increase number of H2 sections for better structure.")
|
||||
|
||||
# Internal link suggestions: generate anchors for H2s and propose cross-links
|
||||
def to_anchor(h: str) -> str:
|
||||
import re
|
||||
a = re.sub(r"[^a-z0-9\s-]", "", h.lower())
|
||||
a = re.sub(r"\s+", "-", a).strip('-')
|
||||
return a
|
||||
h2_headings = [ln[3:].strip() for ln in heading_lines if ln.startswith('## ')]
|
||||
anchors = [to_anchor(h) for h in h2_headings]
|
||||
internal_link_suggestions = []
|
||||
for i in range(len(anchors)-1):
|
||||
internal_link_suggestions.append({
|
||||
"from": h2_headings[i],
|
||||
"to": h2_headings[i+1],
|
||||
"anchor": f"#{anchors[i+1]}",
|
||||
"suggestion": f"Add internal link from '{h2_headings[i]}' to '{h2_headings[i+1]}'"
|
||||
})
|
||||
|
||||
return BlogSEOAnalyzeResponse(
|
||||
success=True,
|
||||
seo_score=float(on_page_result.get("overall_score", 75)),
|
||||
density=density_map,
|
||||
structure={
|
||||
**on_page_result.get("heading_structure", {}),
|
||||
"markdown_headings": {"h1": h1, "h2": h2, "h3": h3},
|
||||
"links": {"total": len(md_links), "external": len(external_links)}
|
||||
},
|
||||
readability=on_page_result.get("content_analysis", {}),
|
||||
link_suggestions=([{"suggestion": "Add external citation links for key claims."}] if not external_links else []) + internal_link_suggestions,
|
||||
image_alt_status=image_alt_status,
|
||||
recommendations=recommendations,
|
||||
)
|
||||
|
||||
async def seo_metadata(self, request: BlogSEOMetadataRequest) -> BlogSEOMetadataResponse:
|
||||
# TODO: Generate SEO metadata using existing services
|
||||
return BlogSEOMetadataResponse(
|
||||
success=True,
|
||||
title_options=[request.title or "Generated SEO Title"],
|
||||
meta_descriptions=["Compelling meta description..."],
|
||||
open_graph={"title": request.title or "OG Title", "image": ""},
|
||||
twitter_card={"card": "summary_large_image"},
|
||||
schema={"@type": "Article"},
|
||||
)
|
||||
|
||||
async def publish(self, request: BlogPublishRequest) -> BlogPublishResponse:
|
||||
# TODO: Call Wix/WordPress adapters to publish
|
||||
return BlogPublishResponse(success=True, platform=request.platform, url="https://example.com/post")
|
||||
|
||||
|
||||
Binary file not shown.
@@ -43,7 +43,7 @@ class GeminiGroundedProvider:
|
||||
|
||||
# Initialize the Gemini client with timeout configuration
|
||||
self.client = genai.Client(api_key=self.api_key)
|
||||
self.timeout = 30 # 30 second timeout for API calls
|
||||
self.timeout = 60 # 60 second timeout for API calls (increased for research)
|
||||
logger.info("✅ Gemini Grounded Provider initialized with native Google Search grounding")
|
||||
|
||||
async def generate_grounded_content(
|
||||
@@ -239,8 +239,8 @@ class GeminiGroundedProvider:
|
||||
logger.info(f"Search queries: {grounding_metadata.web_search_queries}")
|
||||
|
||||
# Extract sources from grounding chunks
|
||||
sources = [] # Initialize sources list
|
||||
if hasattr(grounding_metadata, 'grounding_chunks') and grounding_metadata.grounding_chunks:
|
||||
sources = []
|
||||
for i, chunk in enumerate(grounding_metadata.grounding_chunks):
|
||||
logger.info(f"Chunk {i} attributes: {dir(chunk)}")
|
||||
if hasattr(chunk, 'web'):
|
||||
@@ -251,15 +251,29 @@ class GeminiGroundedProvider:
|
||||
'type': 'web'
|
||||
}
|
||||
sources.append(source)
|
||||
result['sources'] = sources
|
||||
logger.info(f"Extracted {len(sources)} sources")
|
||||
logger.info(f"Extracted {len(sources)} sources from grounding chunks")
|
||||
else:
|
||||
logger.error("❌ CRITICAL: No grounding chunks found in response")
|
||||
logger.error(f"Grounding metadata structure: {dir(grounding_metadata)}")
|
||||
if hasattr(grounding_metadata, 'grounding_chunks'):
|
||||
logger.error(f"Grounding chunks type: {type(grounding_metadata.grounding_chunks)}")
|
||||
logger.error(f"Grounding chunks value: {grounding_metadata.grounding_chunks}")
|
||||
raise ValueError("No grounding chunks found - grounding is not working properly")
|
||||
logger.warning("⚠️ No grounding chunks found - this is normal for some queries")
|
||||
logger.info(f"Grounding metadata available fields: {[attr for attr in dir(grounding_metadata) if not attr.startswith('_')]}")
|
||||
|
||||
# Check if we have search queries - this means Google Search was triggered
|
||||
if hasattr(grounding_metadata, 'web_search_queries') and grounding_metadata.web_search_queries:
|
||||
logger.info(f"✅ Google Search was triggered with {len(grounding_metadata.web_search_queries)} queries")
|
||||
# Create sources based on search queries
|
||||
for i, query in enumerate(grounding_metadata.web_search_queries[:5]): # Limit to 5 sources
|
||||
source = {
|
||||
'index': i,
|
||||
'title': f"Search: {query}",
|
||||
'url': f"https://www.google.com/search?q={query.replace(' ', '+')}",
|
||||
'type': 'search_query',
|
||||
'query': query
|
||||
}
|
||||
sources.append(source)
|
||||
logger.info(f"Created {len(sources)} sources from search queries")
|
||||
else:
|
||||
logger.warning("⚠️ No search queries found either - grounding may not have been triggered")
|
||||
|
||||
result['sources'] = sources
|
||||
|
||||
# Extract citations from grounding supports
|
||||
if hasattr(grounding_metadata, 'grounding_supports') and grounding_metadata.grounding_supports:
|
||||
@@ -278,12 +292,37 @@ class GeminiGroundedProvider:
|
||||
result['citations'] = citations
|
||||
logger.info(f"Extracted {len(citations)} citations")
|
||||
else:
|
||||
logger.error("❌ CRITICAL: No grounding supports found in response")
|
||||
logger.error(f"Grounding metadata structure: {dir(grounding_metadata)}")
|
||||
if hasattr(grounding_metadata, 'grounding_supports'):
|
||||
logger.error(f"Grounding supports type: {type(grounding_metadata.grounding_supports)}")
|
||||
logger.error(f"Grounding supports value: {grounding_metadata.grounding_supports}")
|
||||
raise ValueError("No grounding supports found - grounding is not working properly")
|
||||
logger.warning("⚠️ No grounding supports found - this is normal when no web sources are retrieved")
|
||||
# Create basic citations from the content if we have sources
|
||||
if sources:
|
||||
citations = []
|
||||
for i, source in enumerate(sources[:3]): # Limit to 3 citations
|
||||
citation = {
|
||||
'type': 'reference',
|
||||
'start_index': 0,
|
||||
'end_index': 0,
|
||||
'text': f"Source {i+1}",
|
||||
'source_indices': [i],
|
||||
'reference': f"Source {i+1}",
|
||||
'source': source
|
||||
}
|
||||
citations.append(citation)
|
||||
result['citations'] = citations
|
||||
logger.info(f"Created {len(citations)} basic citations from sources")
|
||||
else:
|
||||
result['citations'] = []
|
||||
logger.info("No citations created - no sources available")
|
||||
|
||||
# Extract search entry point for UI display
|
||||
if hasattr(grounding_metadata, 'search_entry_point') and grounding_metadata.search_entry_point:
|
||||
if hasattr(grounding_metadata.search_entry_point, 'rendered_content'):
|
||||
result['search_widget'] = grounding_metadata.search_entry_point.rendered_content
|
||||
logger.info("✅ Extracted search widget HTML for UI display")
|
||||
|
||||
# Extract search queries for reference
|
||||
if hasattr(grounding_metadata, 'web_search_queries') and grounding_metadata.web_search_queries:
|
||||
result['search_queries'] = grounding_metadata.web_search_queries
|
||||
logger.info(f"✅ Extracted {len(grounding_metadata.web_search_queries)} search queries")
|
||||
|
||||
logger.info(f"✅ Successfully extracted {len(result['sources'])} sources and {len(result['citations'])} citations from grounding metadata")
|
||||
logger.info(f"Sources: {result['sources']}")
|
||||
|
||||
@@ -389,43 +389,13 @@ def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9,
|
||||
config=generation_config,
|
||||
)
|
||||
|
||||
# Add debugging for response
|
||||
logger.info("Gemini response | type=%s | has_text=%s | has_parsed=%s",
|
||||
type(response), hasattr(response, 'text'), hasattr(response, 'parsed'))
|
||||
|
||||
if hasattr(response, 'text'):
|
||||
logger.info(f"Gemini response.text: {repr(response.text)}")
|
||||
if hasattr(response, 'parsed'):
|
||||
logger.info(f"Gemini response.parsed: {repr(response.parsed)}")
|
||||
|
||||
# According to the documentation, we should use response.parsed for structured output
|
||||
if hasattr(response, 'parsed') and response.parsed is not None:
|
||||
logger.info("Using response.parsed for structured output")
|
||||
return response.parsed
|
||||
|
||||
# Fallback to text if parsed is not available
|
||||
if hasattr(response, 'text') and response.text:
|
||||
logger.info("Falling back to response.text parsing")
|
||||
text = response.text.strip()
|
||||
|
||||
# Strip markdown code fences if present
|
||||
if text.startswith('```'):
|
||||
if text.lower().startswith('```json'):
|
||||
text = text[7:]
|
||||
else:
|
||||
text = text[3:]
|
||||
if text.endswith('```'):
|
||||
text = text[:-3]
|
||||
text = text.strip()
|
||||
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse response.text as JSON: {e}")
|
||||
return {"error": f"Failed to parse JSON response: {e}", "raw_response": text[:500]}
|
||||
|
||||
logger.error("No valid response content found")
|
||||
return {"error": "No valid response content found", "raw_response": ""}
|
||||
logger.error("No valid structured response content found")
|
||||
return {"error": "No valid structured response content found"}
|
||||
|
||||
except ValueError as e:
|
||||
# API key related errors
|
||||
|
||||
@@ -45,8 +45,7 @@ class GoogleSearchService:
|
||||
self.base_url = "https://www.googleapis.com/customsearch/v1"
|
||||
|
||||
if not self.api_key or not self.search_engine_id:
|
||||
logger.warning("Google Search API credentials not configured. Service will use fallback methods.")
|
||||
self.enabled = False
|
||||
raise ValueError("Google Search API credentials not configured. Please set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables.")
|
||||
else:
|
||||
self.enabled = True
|
||||
logger.info("Google Search Service initialized successfully")
|
||||
@@ -69,8 +68,7 @@ class GoogleSearchService:
|
||||
List of search results with credibility scoring
|
||||
"""
|
||||
if not self.enabled:
|
||||
logger.warning("Google Search Service not enabled, using fallback research")
|
||||
return await self._fallback_research(topic, industry)
|
||||
raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
|
||||
|
||||
try:
|
||||
# Construct industry-specific search query
|
||||
@@ -99,7 +97,7 @@ class GoogleSearchService:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Google search failed: {str(e)}")
|
||||
return await self._fallback_research(topic, industry)
|
||||
raise RuntimeError(f"Google search failed: {str(e)}")
|
||||
|
||||
def _build_search_query(self, topic: str, industry: str) -> str:
|
||||
"""
|
||||
@@ -465,45 +463,6 @@ class GoogleSearchService:
|
||||
"statistics": statistics
|
||||
}
|
||||
|
||||
async def _fallback_research(self, topic: str, industry: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Fallback research method when Google Search is not available.
|
||||
|
||||
Args:
|
||||
topic: The research topic
|
||||
industry: The industry context
|
||||
|
||||
Returns:
|
||||
Fallback research data
|
||||
"""
|
||||
logger.info(f"Using fallback research for {topic} in {industry}")
|
||||
|
||||
return {
|
||||
"sources": [
|
||||
{
|
||||
"title": f"Industry insights on {topic} in {industry}",
|
||||
"url": f"https://example.com/{topic.lower().replace(' ', '-')}",
|
||||
"content": f"Professional insights and trends related to {topic} in the {industry} sector...",
|
||||
"relevance_score": 0.8,
|
||||
"credibility_score": 0.6,
|
||||
"domain_authority": 0.5,
|
||||
"source_type": "general",
|
||||
"grounding_enabled": False
|
||||
}
|
||||
],
|
||||
"key_insights": [
|
||||
f"{topic} is transforming {industry} operations",
|
||||
f"Industry leaders are investing in {topic}",
|
||||
f"Expected growth in {topic} adoption within {industry}"
|
||||
],
|
||||
"statistics": [
|
||||
f"85% of {industry} companies are exploring {topic}",
|
||||
f"Investment in {topic} increased by 40% this year"
|
||||
],
|
||||
"grounding_enabled": False,
|
||||
"search_query": f"{topic} {industry} trends",
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
async def test_api_connection(self) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -513,11 +472,7 @@ class GoogleSearchService:
|
||||
Test results and status information
|
||||
"""
|
||||
if not self.enabled:
|
||||
return {
|
||||
"status": "disabled",
|
||||
"message": "Google Search API credentials not configured",
|
||||
"enabled": False
|
||||
}
|
||||
raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
|
||||
|
||||
try:
|
||||
# Perform a simple test search
|
||||
|
||||
Reference in New Issue
Block a user